Objects/stringobject.c

   1 /* String (str/bytes) object implementation */
   2
   3 #define PY_SSIZE_T_CLEAN
   4
   5 #include "Python.h"
   6 #include <ctype.h>
   7
   8 #ifdef COUNT_ALLOCS
   9 int null_strings, one_strings;
  10 #endif
  11
  12 static PyStringObject *characters[UCHAR_MAX + 1];
  13 static PyStringObject *nullstring;
  14
  15 /* This dictionary holds all interned strings.  Note that references to
  16    strings in this dictionary are *not* counted in the string's ob_refcnt.
  17    When the interned string reaches a refcnt of 0 the string deallocation
  18    function will delete the reference from this dictionary.
  19
  20    Another way to look at this is that to say that the actual reference
  21    count of a string is:  s->ob_refcnt + (s->ob_sstate?2:0)
  22 */
  23 static PyObject *interned;
  24
  25 /*
  26    For both PyString_FromString() and PyString_FromStringAndSize(), the
  27    parameter `size' denotes number of characters to allocate, not counting any
  28    null terminating character.
  29
  30    For PyString_FromString(), the parameter `str' points to a null-terminated
  31    string containing exactly `size' bytes.
  32
  33    For PyString_FromStringAndSize(), the parameter the parameter `str' is
  34    either NULL or else points to a string containing at least `size' bytes.
  35    For PyString_FromStringAndSize(), the string in the `str' parameter does
  36    not have to be null-terminated.  (Therefore it is safe to construct a
  37    substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
  38    If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
  39    bytes (setting the last byte to the null terminating character) and you can
  40    fill in the data yourself.  If `str' is non-NULL then the resulting
  41    PyString object must be treated as immutable and you must not fill in nor
  42    alter the data yourself, since the strings may be shared.
  43
  44    The PyObject member `op->ob_size', which denotes the number of "extra
  45    items" in a variable-size object, will contain the number of bytes
  46    allocated for string data, not counting the null terminating character.  It
  47    is therefore equal to the equal to the `size' parameter (for
  48    PyString_FromStringAndSize()) or the length of the string in the `str'
  49    parameter (for PyString_FromString()).
  50 */
  51 PyObject *
  52 PyString_FromStringAndSize(const char *str, Py_ssize_t size)
  53 {
  54         register PyStringObject *op;
  55         if (size < 0) {
  56                 PyErr_SetString(PyExc_SystemError,
  57                     "Negative size passed to PyString_FromStringAndSize");
  58                 return NULL;
  59         }
  60         if (size == 0 && (op = nullstring) != NULL) {
  61 #ifdef COUNT_ALLOCS
  62                 null_strings++;
  63 #endif
  64                 Py_INCREF(op);
  65                 return (PyObject *)op;
  66         }
  67         if (size == 1 && str != NULL &&
  68             (op = characters[*str & UCHAR_MAX]) != NULL)
  69         {
  70 #ifdef COUNT_ALLOCS
  71                 one_strings++;
  72 #endif
  73                 Py_INCREF(op);
  74                 return (PyObject *)op;
  75         }
  76
  77         if (size > PY_SSIZE_T_MAX - sizeof(PyStringObject)) {
  78                 PyErr_SetString(PyExc_OverflowError, "string is too large");
  79                 return NULL;
  80         }
  81
  82         /* Inline PyObject_NewVar */
  83         op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
  84         if (op == NULL)
  85                 return PyErr_NoMemory();
  86         PyObject_INIT_VAR(op, &PyString_Type, size);
  87         op->ob_shash = -1;
  88         op->ob_sstate = SSTATE_NOT_INTERNED;
  89         if (str != NULL)
  90                 Py_MEMCPY(op->ob_sval, str, size);
  91         op->ob_sval[size] = '\0';
  92         /* share short strings */
  93         if (size == 0) {
  94                 PyObject *t = (PyObject *)op;
  95                 PyString_InternInPlace(&t);
  96                 op = (PyStringObject *)t;
  97                 nullstring = op;
  98                 Py_INCREF(op);
  99         } else if (size == 1 && str != NULL) {
 100                 PyObject *t = (PyObject *)op;
 101                 PyString_InternInPlace(&t);
 102                 op = (PyStringObject *)t;
 103                 characters[*str & UCHAR_MAX] = op;
 104                 Py_INCREF(op);
 105         }
 106         return (PyObject *) op;
 107 }
 108
 109 PyObject *
 110 PyString_FromString(const char *str)
 111 {
 112         register size_t size;
 113         register PyStringObject *op;
 114
 115         assert(str != NULL);
 116         size = strlen(str);
 117         if (size > PY_SSIZE_T_MAX - sizeof(PyStringObject)) {
 118                 PyErr_SetString(PyExc_OverflowError,
 119                         "string is too long for a Python string");
 120                 return NULL;
 121         }
 122         if (size == 0 && (op = nullstring) != NULL) {
 123 #ifdef COUNT_ALLOCS
 124                 null_strings++;
 125 #endif
 126                 Py_INCREF(op);
 127                 return (PyObject *)op;
 128         }
 129         if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
 130 #ifdef COUNT_ALLOCS
 131                 one_strings++;
 132 #endif
 133                 Py_INCREF(op);
 134                 return (PyObject *)op;
 135         }
 136
 137         /* Inline PyObject_NewVar */
 138         op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
 139         if (op == NULL)
 140                 return PyErr_NoMemory();
 141         PyObject_INIT_VAR(op, &PyString_Type, size);
 142         op->ob_shash = -1;
 143         op->ob_sstate = SSTATE_NOT_INTERNED;
 144         Py_MEMCPY(op->ob_sval, str, size+1);
 145         /* share short strings */
 146         if (size == 0) {
 147                 PyObject *t = (PyObject *)op;
 148                 PyString_InternInPlace(&t);
 149                 op = (PyStringObject *)t;
 150                 nullstring = op;
 151                 Py_INCREF(op);
 152         } else if (size == 1) {
 153                 PyObject *t = (PyObject *)op;
 154                 PyString_InternInPlace(&t);
 155                 op = (PyStringObject *)t;
 156                 characters[*str & UCHAR_MAX] = op;
 157                 Py_INCREF(op);
 158         }
 159         return (PyObject *) op;
 160 }
 161
 162 PyObject *
 163 PyString_FromFormatV(const char *format, va_list vargs)
 164 {
 165         va_list count;
 166         Py_ssize_t n = 0;
 167         const char* f;
 168         char *s;
 169         PyObject* string;
 170
 171 #ifdef VA_LIST_IS_ARRAY
 172         Py_MEMCPY(count, vargs, sizeof(va_list));
 173 #else
 174 #ifdef  __va_copy
 175         __va_copy(count, vargs);
 176 #else
 177         count = vargs;
 178 #endif
 179 #endif
 180         /* step 1: figure out how large a buffer we need */
 181         for (f = format; *f; f++) {
 182                 if (*f == '%') {
 183                         const char* p = f;
 184                         while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
 185                                 ;
 186
 187                         /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
 188                          * they don't affect the amount of space we reserve.
 189                          */
 190                         if ((*f == 'l' || *f == 'z') &&
 191                                         (f[1] == 'd' || f[1] == 'u'))
 192                                 ++f;
 193
 194                         switch (*f) {
 195                         case 'c':
 196                                 (void)va_arg(count, int);
 197                                 /* fall through... */
 198                         case '%':
 199                                 n++;
 200                                 break;
 201                         case 'd': case 'u': case 'i': case 'x':
 202                                 (void) va_arg(count, int);
 203                                 /* 20 bytes is enough to hold a 64-bit
 204                                    integer.  Decimal takes the most space.
 205                                    This isn't enough for octal. */
 206                                 n += 20;
 207                                 break;
 208                         case 's':
 209                                 s = va_arg(count, char*);
 210                                 n += strlen(s);
 211                                 break;
 212                         case 'p':
 213                                 (void) va_arg(count, int);
 214                                 /* maximum 64-bit pointer representation:
 215                                  * 0xffffffffffffffff
 216                                  * so 19 characters is enough.
 217                                  * XXX I count 18 -- what's the extra for?
 218                                  */
 219                                 n += 19;
 220                                 break;
 221                         default:
 222                                 /* if we stumble upon an unknown
 223                                    formatting code, copy the rest of
 224                                    the format string to the output
 225                                    string. (we cannot just skip the
 226                                    code, since there's no way to know
 227                                    what's in the argument list) */
 228                                 n += strlen(p);
 229                                 goto expand;
 230                         }
 231                 } else
 232                         n++;
 233         }
 234  expand:
 235         /* step 2: fill the buffer */
 236         /* Since we've analyzed how much space we need for the worst case,
 237            use sprintf directly instead of the slower PyOS_snprintf. */
 238         string = PyString_FromStringAndSize(NULL, n);
 239         if (!string)
 240                 return NULL;
 241
 242         s = PyString_AsString(string);
 243
 244         for (f = format; *f; f++) {
 245                 if (*f == '%') {
 246                         const char* p = f++;
 247                         Py_ssize_t i;
 248                         int longflag = 0;
 249                         int size_tflag = 0;
 250                         /* parse the width.precision part (we're only
 251                            interested in the precision value, if any) */
 252                         n = 0;
 253                         while (isdigit(Py_CHARMASK(*f)))
 254                                 n = (n*10) + *f++ - '0';
 255                         if (*f == '.') {
 256                                 f++;
 257                                 n = 0;
 258                                 while (isdigit(Py_CHARMASK(*f)))
 259                                         n = (n*10) + *f++ - '0';
 260                         }
 261                         while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
 262                                 f++;
 263                         /* handle the long flag, but only for %ld and %lu.
 264                            others can be added when necessary. */
 265                         if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
 266                                 longflag = 1;
 267                                 ++f;
 268                         }
 269                         /* handle the size_t flag. */
 270                         if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
 271                                 size_tflag = 1;
 272                                 ++f;
 273                         }
 274
 275                         switch (*f) {
 276                         case 'c':
 277                                 *s++ = va_arg(vargs, int);
 278                                 break;
 279                         case 'd':
 280                                 if (longflag)
 281                                         sprintf(s, "%ld", va_arg(vargs, long));
 282                                 else if (size_tflag)
 283                                         sprintf(s, "%" PY_FORMAT_SIZE_T "d",
 284                                                 va_arg(vargs, Py_ssize_t));
 285                                 else
 286                                         sprintf(s, "%d", va_arg(vargs, int));
 287                                 s += strlen(s);
 288                                 break;
 289                         case 'u':
 290                                 if (longflag)
 291                                         sprintf(s, "%lu",
 292                                                 va_arg(vargs, unsigned long));
 293                                 else if (size_tflag)
 294                                         sprintf(s, "%" PY_FORMAT_SIZE_T "u",
 295                                                 va_arg(vargs, size_t));
 296                                 else
 297                                         sprintf(s, "%u",
 298                                                 va_arg(vargs, unsigned int));
 299                                 s += strlen(s);
 300                                 break;
 301                         case 'i':
 302                                 sprintf(s, "%i", va_arg(vargs, int));
 303                                 s += strlen(s);
 304                                 break;
 305                         case 'x':
 306                                 sprintf(s, "%x", va_arg(vargs, int));
 307                                 s += strlen(s);
 308                                 break;
 309                         case 's':
 310                                 p = va_arg(vargs, char*);
 311                                 i = strlen(p);
 312                                 if (n > 0 && i > n)
 313                                         i = n;
 314                                 Py_MEMCPY(s, p, i);
 315                                 s += i;
 316                                 break;
 317                         case 'p':
 318                                 sprintf(s, "%p", va_arg(vargs, void*));
 319                                 /* %p is ill-defined:  ensure leading 0x. */
 320                                 if (s[1] == 'X')
 321                                         s[1] = 'x';
 322                                 else if (s[1] != 'x') {
 323                                         memmove(s+2, s, strlen(s)+1);
 324                                         s[0] = '0';
 325                                         s[1] = 'x';
 326                                 }
 327                                 s += strlen(s);
 328                                 break;
 329                         case '%':
 330                                 *s++ = '%';
 331                                 break;
 332                         default:
 333                                 strcpy(s, p);
 334                                 s += strlen(s);
 335                                 goto end;
 336                         }
 337                 } else
 338                         *s++ = *f;
 339         }
 340
 341  end:
 342         _PyString_Resize(&string, s - PyString_AS_STRING(string));
 343         return string;
 344 }
 345
 346 PyObject *
 347 PyString_FromFormat(const char *format, ...)
 348 {
 349         PyObject* ret;
 350         va_list vargs;
 351
 352 #ifdef HAVE_STDARG_PROTOTYPES
 353         va_start(vargs, format);
 354 #else
 355         va_start(vargs);
 356 #endif
 357         ret = PyString_FromFormatV(format, vargs);
 358         va_end(vargs);
 359         return ret;
 360 }
 361
 362
 363 PyObject *PyString_Decode(const char *s,
 364                           Py_ssize_t size,
 365                           const char *encoding,
 366                           const char *errors)
 367 {
 368     PyObject *v, *str;
 369
 370     str = PyString_FromStringAndSize(s, size);
 371     if (str == NULL)
 372         return NULL;
 373     v = PyString_AsDecodedString(str, encoding, errors);
 374     Py_DECREF(str);
 375     return v;
 376 }
 377
 378 PyObject *PyString_AsDecodedObject(PyObject *str,
 379                                    const char *encoding,
 380                                    const char *errors)
 381 {
 382     PyObject *v;
 383
 384     if (!PyString_Check(str)) {
 385         PyErr_BadArgument();
 386         goto onError;
 387     }
 388
 389     if (encoding == NULL) {
 390 #ifdef Py_USING_UNICODE
 391         encoding = PyUnicode_GetDefaultEncoding();
 392 #else
 393         PyErr_SetString(PyExc_ValueError, "no encoding specified");
 394         goto onError;
 395 #endif
 396     }
 397
 398     /* Decode via the codec registry */
 399     v = PyCodec_Decode(str, encoding, errors);
 400     if (v == NULL)
 401         goto onError;
 402
 403     return v;
 404
 405  onError:
 406     return NULL;
 407 }
 408
 409 PyObject *PyString_AsDecodedString(PyObject *str,
 410                                    const char *encoding,
 411                                    const char *errors)
 412 {
 413     PyObject *v;
 414
 415     v = PyString_AsDecodedObject(str, encoding, errors);
 416     if (v == NULL)
 417         goto onError;
 418
 419 #ifdef Py_USING_UNICODE
 420     /* Convert Unicode to a string using the default encoding */
 421     if (PyUnicode_Check(v)) {
 422         PyObject *temp = v;
 423         v = PyUnicode_AsEncodedString(v, NULL, NULL);
 424         Py_DECREF(temp);
 425         if (v == NULL)
 426             goto onError;
 427     }
 428 #endif
 429     if (!PyString_Check(v)) {
 430         PyErr_Format(PyExc_TypeError,
 431                      "decoder did not return a string object (type=%.400s)",
 432                      Py_TYPE(v)->tp_name);
 433         Py_DECREF(v);
 434         goto onError;
 435     }
 436
 437     return v;
 438
 439  onError:
 440     return NULL;
 441 }
 442
 443 PyObject *PyString_Encode(const char *s,
 444                           Py_ssize_t size,
 445                           const char *encoding,
 446                           const char *errors)
 447 {
 448     PyObject *v, *str;
 449
 450     str = PyString_FromStringAndSize(s, size);
 451     if (str == NULL)
 452         return NULL;
 453     v = PyString_AsEncodedString(str, encoding, errors);
 454     Py_DECREF(str);
 455     return v;
 456 }
 457
 458 PyObject *PyString_AsEncodedObject(PyObject *str,
 459                                    const char *encoding,
 460                                    const char *errors)
 461 {
 462     PyObject *v;
 463
 464     if (!PyString_Check(str)) {
 465         PyErr_BadArgument();
 466         goto onError;
 467     }
 468
 469     if (encoding == NULL) {
 470 #ifdef Py_USING_UNICODE
 471         encoding = PyUnicode_GetDefaultEncoding();
 472 #else
 473         PyErr_SetString(PyExc_ValueError, "no encoding specified");
 474         goto onError;
 475 #endif
 476     }
 477
 478     /* Encode via the codec registry */
 479     v = PyCodec_Encode(str, encoding, errors);
 480     if (v == NULL)
 481         goto onError;
 482
 483     return v;
 484
 485  onError:
 486     return NULL;
 487 }
 488
 489 PyObject *PyString_AsEncodedString(PyObject *str,
 490                                    const char *encoding,
 491                                    const char *errors)
 492 {
 493     PyObject *v;
 494
 495     v = PyString_AsEncodedObject(str, encoding, errors);
 496     if (v == NULL)
 497         goto onError;
 498
 499 #ifdef Py_USING_UNICODE
 500     /* Convert Unicode to a string using the default encoding */
 501     if (PyUnicode_Check(v)) {
 502         PyObject *temp = v;
 503         v = PyUnicode_AsEncodedString(v, NULL, NULL);
 504         Py_DECREF(temp);
 505         if (v == NULL)
 506             goto onError;
 507     }
 508 #endif
 509     if (!PyString_Check(v)) {
 510         PyErr_Format(PyExc_TypeError,
 511                      "encoder did not return a string object (type=%.400s)",
 512                      Py_TYPE(v)->tp_name);
 513         Py_DECREF(v);
 514         goto onError;
 515     }
 516
 517     return v;
 518
 519  onError:
 520     return NULL;
 521 }
 522
 523 static void
 524 string_dealloc(PyObject *op)
 525 {
 526         switch (PyString_CHECK_INTERNED(op)) {
 527                 case SSTATE_NOT_INTERNED:
 528                         break;
 529
 530                 case SSTATE_INTERNED_MORTAL:
 531                         /* revive dead object temporarily for DelItem */
 532                         Py_REFCNT(op) = 3;
 533                         if (PyDict_DelItem(interned, op) != 0)
 534                                 Py_FatalError(
 535                                         "deletion of interned string failed");
 536                         break;
 537
 538                 case SSTATE_INTERNED_IMMORTAL:
 539                         Py_FatalError("Immortal interned string died.");
 540
 541                 default:
 542                         Py_FatalError("Inconsistent interned string state.");
 543         }
 544         Py_TYPE(op)->tp_free(op);
 545 }
 546
 547 /* Unescape a backslash-escaped string. If unicode is non-zero,
 548    the string is a u-literal. If recode_encoding is non-zero,
 549    the string is UTF-8 encoded and should be re-encoded in the
 550    specified encoding.  */
 551
 552 PyObject *PyString_DecodeEscape(const char *s,
 553                                 Py_ssize_t len,
 554                                 const char *errors,
 555                                 Py_ssize_t unicode,
 556                                 const char *recode_encoding)
 557 {
 558         int c;
 559         char *p, *buf;
 560         const char *end;
 561         PyObject *v;
 562         Py_ssize_t newlen = recode_encoding ? 4*len:len;
 563         v = PyString_FromStringAndSize((char *)NULL, newlen);
 564         if (v == NULL)
 565                 return NULL;
 566         p = buf = PyString_AsString(v);
 567         end = s + len;
 568         while (s < end) {
 569                 if (*s != '\\') {
 570                   non_esc:
 571 #ifdef Py_USING_UNICODE
 572                         if (recode_encoding && (*s & 0x80)) {
 573                                 PyObject *u, *w;
 574                                 char *r;
 575                                 const char* t;
 576                                 Py_ssize_t rn;
 577                                 t = s;
 578                                 /* Decode non-ASCII bytes as UTF-8. */
 579                                 while (t < end && (*t & 0x80)) t++;
 580                                 u = PyUnicode_DecodeUTF8(s, t - s, errors);
 581                                 if(!u) goto failed;
 582
 583                                 /* Recode them in target encoding. */
 584                                 w = PyUnicode_AsEncodedString(
 585                                         u, recode_encoding, errors);
 586                                 Py_DECREF(u);
 587                                 if (!w) goto failed;
 588
 589                                 /* Append bytes to output buffer. */
 590                                 assert(PyString_Check(w));
 591                                 r = PyString_AS_STRING(w);
 592                                 rn = PyString_GET_SIZE(w);
 593                                 Py_MEMCPY(p, r, rn);
 594                                 p += rn;
 595                                 Py_DECREF(w);
 596                                 s = t;
 597                         } else {
 598                                 *p++ = *s++;
 599                         }
 600 #else
 601                         *p++ = *s++;
 602 #endif
 603                         continue;
 604                 }
 605                 s++;
 606                 if (s==end) {
 607                         PyErr_SetString(PyExc_ValueError,
 608                                         "Trailing \\ in string");
 609                         goto failed;
 610                 }
 611                 switch (*s++) {
 612                 /* XXX This assumes ASCII! */
 613                 case '\n': break;
 614                 case '\\': *p++ = '\\'; break;
 615                 case '\'': *p++ = '\''; break;
 616                 case '\"': *p++ = '\"'; break;
 617                 case 'b': *p++ = '\b'; break;
 618                 case 'f': *p++ = '\014'; break; /* FF */
 619                 case 't': *p++ = '\t'; break;
 620                 case 'n': *p++ = '\n'; break;
 621                 case 'r': *p++ = '\r'; break;
 622                 case 'v': *p++ = '\013'; break; /* VT */
 623                 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
 624                 case '0': case '1': case '2': case '3':
 625                 case '4': case '5': case '6': case '7':
 626                         c = s[-1] - '0';
 627                         if (s < end && '0' <= *s && *s <= '7') {
 628                                 c = (c<<3) + *s++ - '0';
 629                                 if (s < end && '0' <= *s && *s <= '7')
 630                                         c = (c<<3) + *s++ - '0';
 631                         }
 632                         *p++ = c;
 633                         break;
 634                 case 'x':
 635                         if (s+1 < end &&
 636                             isxdigit(Py_CHARMASK(s[0])) &&
 637                             isxdigit(Py_CHARMASK(s[1])))
 638                         {
 639                                 unsigned int x = 0;
 640                                 c = Py_CHARMASK(*s);
 641                                 s++;
 642                                 if (isdigit(c))
 643                                         x = c - '0';
 644                                 else if (islower(c))
 645                                         x = 10 + c - 'a';
 646                                 else
 647                                         x = 10 + c - 'A';
 648                                 x = x << 4;
 649                                 c = Py_CHARMASK(*s);
 650                                 s++;
 651                                 if (isdigit(c))
 652                                         x += c - '0';
 653                                 else if (islower(c))
 654                                         x += 10 + c - 'a';
 655                                 else
 656                                         x += 10 + c - 'A';
 657                                 *p++ = x;
 658                                 break;
 659                         }
 660                         if (!errors || strcmp(errors, "strict") == 0) {
 661                                 PyErr_SetString(PyExc_ValueError,
 662                                                 "invalid \\x escape");
 663                                 goto failed;
 664                         }
 665                         if (strcmp(errors, "replace") == 0) {
 666                                 *p++ = '?';
 667                         } else if (strcmp(errors, "ignore") == 0)
 668                                 /* do nothing */;
 669                         else {
 670                                 PyErr_Format(PyExc_ValueError,
 671                                              "decoding error; "
 672                                              "unknown error handling code: %.400s",
 673                                              errors);
 674                                 goto failed;
 675                         }
 676 #ifndef Py_USING_UNICODE
 677                 case 'u':
 678                 case 'U':
 679                 case 'N':
 680                         if (unicode) {
 681                                 PyErr_SetString(PyExc_ValueError,
 682                                           "Unicode escapes not legal "
 683                                           "when Unicode disabled");
 684                                 goto failed;
 685                         }
 686 #endif
 687                 default:
 688                         *p++ = '\\';
 689                         s--;
 690                         goto non_esc; /* an arbitry number of unescaped
 691                                          UTF-8 bytes may follow. */
 692                 }
 693         }
 694         if (p-buf < newlen)
 695                 _PyString_Resize(&v, p - buf);
 696         return v;
 697   failed:
 698         Py_DECREF(v);
 699         return NULL;
 700 }
 701
 702 /* -------------------------------------------------------------------- */
 703 /* object api */
 704
 705 static Py_ssize_t
 706 string_getsize(register PyObject *op)
 707 {
 708         char *s;
 709         Py_ssize_t len;
 710         if (PyString_AsStringAndSize(op, &s, &len))
 711                 return -1;
 712         return len;
 713 }
 714
 715 static /*const*/ char *
 716 string_getbuffer(register PyObject *op)
 717 {
 718         char *s;
 719         Py_ssize_t len;
 720         if (PyString_AsStringAndSize(op, &s, &len))
 721                 return NULL;
 722         return s;
 723 }
 724
 725 Py_ssize_t
 726 PyString_Size(register PyObject *op)
 727 {
 728         if (!PyString_Check(op))
 729                 return string_getsize(op);
 730         return Py_SIZE(op);
 731 }
 732
 733 /*const*/ char *
 734 PyString_AsString(register PyObject *op)
 735 {
 736         if (!PyString_Check(op))
 737                 return string_getbuffer(op);
 738         return ((PyStringObject *)op) -> ob_sval;
 739 }
 740
 741 int
 742 PyString_AsStringAndSize(register PyObject *obj,
 743                          register char **s,
 744                          register Py_ssize_t *len)
 745 {
 746         if (s == NULL) {
 747                 PyErr_BadInternalCall();
 748                 return -1;
 749         }
 750
 751         if (!PyString_Check(obj)) {
 752 #ifdef Py_USING_UNICODE
 753                 if (PyUnicode_Check(obj)) {
 754                         obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
 755                         if (obj == NULL)
 756                                 return -1;
 757                 }
 758                 else
 759 #endif
 760                 {
 761                         PyErr_Format(PyExc_TypeError,
 762                                      "expected string or Unicode object, "
 763                                      "%.200s found", Py_TYPE(obj)->tp_name);
 764                         return -1;
 765                 }
 766         }
 767
 768         *s = PyString_AS_STRING(obj);
 769         if (len != NULL)
 770                 *len = PyString_GET_SIZE(obj);
 771         else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
 772                 PyErr_SetString(PyExc_TypeError,
 773                                 "expected string without null bytes");
 774                 return -1;
 775         }
 776         return 0;
 777 }
 778
 779 /* -------------------------------------------------------------------- */
 780 /* Methods */
 781
 782 #include "stringlib/stringdefs.h"
 783 #include "stringlib/fastsearch.h"
 784
 785 #include "stringlib/count.h"
 786 #include "stringlib/find.h"
 787 #include "stringlib/partition.h"
 788
 789 #define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
 790 #include "stringlib/localeutil.h"
 791
 792
 793
 794 static int
 795 string_print(PyStringObject *op, FILE *fp, int flags)
 796 {
 797         Py_ssize_t i, str_len;
 798         char c;
 799         int quote;
 800
 801         /* XXX Ought to check for interrupts when writing long strings */
 802         if (! PyString_CheckExact(op)) {
 803                 int ret;
 804                 /* A str subclass may have its own __str__ method. */
 805                 op = (PyStringObject *) PyObject_Str((PyObject *)op);
 806                 if (op == NULL)
 807                         return -1;
 808                 ret = string_print(op, fp, flags);
 809                 Py_DECREF(op);
 810                 return ret;
 811         }
 812         if (flags & Py_PRINT_RAW) {
 813                 char *data = op->ob_sval;
 814                 Py_ssize_t size = Py_SIZE(op);
 815                 Py_BEGIN_ALLOW_THREADS
 816                 while (size > INT_MAX) {
 817                         /* Very long strings cannot be written atomically.
 818                          * But don't write exactly INT_MAX bytes at a time
 819                          * to avoid memory aligment issues.
 820                          */
 821                         const int chunk_size = INT_MAX & ~0x3FFF;
 822                         fwrite(data, 1, chunk_size, fp);
 823                         data += chunk_size;
 824                         size -= chunk_size;
 825                 }
 826 #ifdef __VMS
 827                 if (size) fwrite(data, (int)size, 1, fp);
 828 #else
 829                 fwrite(data, 1, (int)size, fp);
 830 #endif
 831                 Py_END_ALLOW_THREADS
 832                 return 0;
 833         }
 834
 835         /* figure out which quote to use; single is preferred */
 836         quote = '\'';
 837         if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
 838             !memchr(op->ob_sval, '"', Py_SIZE(op)))
 839                 quote = '"';
 840
 841         str_len = Py_SIZE(op);
 842         Py_BEGIN_ALLOW_THREADS
 843         fputc(quote, fp);
 844         for (i = 0; i < str_len; i++) {
 845                 /* Since strings are immutable and the caller should have a
 846                 reference, accessing the interal buffer should not be an issue
 847                 with the GIL released. */
 848                 c = op->ob_sval[i];
 849                 if (c == quote || c == '\\')
 850                         fprintf(fp, "\\%c", c);
 851                 else if (c == '\t')
 852                         fprintf(fp, "\\t");
 853                 else if (c == '\n')
 854                         fprintf(fp, "\\n");
 855                 else if (c == '\r')
 856                         fprintf(fp, "\\r");
 857                 else if (c < ' ' || c >= 0x7f)
 858                         fprintf(fp, "\\x%02x", c & 0xff);
 859                 else
 860                         fputc(c, fp);
 861         }
 862         fputc(quote, fp);
 863         Py_END_ALLOW_THREADS
 864         return 0;
 865 }
 866
 867 PyObject *
 868 PyString_Repr(PyObject *obj, int smartquotes)
 869 {
 870         register PyStringObject* op = (PyStringObject*) obj;
 871         size_t newsize = 2 + 4 * Py_SIZE(op);
 872         PyObject *v;
 873         if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {
 874                 PyErr_SetString(PyExc_OverflowError,
 875                         "string is too large to make repr");
 876                 return NULL;
 877         }
 878         v = PyString_FromStringAndSize((char *)NULL, newsize);
 879         if (v == NULL) {
 880                 return NULL;
 881         }
 882         else {
 883                 register Py_ssize_t i;
 884                 register char c;
 885                 register char *p;
 886                 int quote;
 887
 888                 /* figure out which quote to use; single is preferred */
 889                 quote = '\'';
 890                 if (smartquotes &&
 891                     memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
 892                     !memchr(op->ob_sval, '"', Py_SIZE(op)))
 893                         quote = '"';
 894
 895                 p = PyString_AS_STRING(v);
 896                 *p++ = quote;
 897                 for (i = 0; i < Py_SIZE(op); i++) {
 898                         /* There's at least enough room for a hex escape
 899                            and a closing quote. */
 900                         assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
 901                         c = op->ob_sval[i];
 902                         if (c == quote || c == '\\')
 903                                 *p++ = '\\', *p++ = c;
 904                         else if (c == '\t')
 905                                 *p++ = '\\', *p++ = 't';
 906                         else if (c == '\n')
 907                                 *p++ = '\\', *p++ = 'n';
 908                         else if (c == '\r')
 909                                 *p++ = '\\', *p++ = 'r';
 910                         else if (c < ' ' || c >= 0x7f) {
 911                                 /* For performance, we don't want to call
 912                                    PyOS_snprintf here (extra layers of
 913                                    function call). */
 914                                 sprintf(p, "\\x%02x", c & 0xff);
 915                                 p += 4;
 916                         }
 917                         else
 918                                 *p++ = c;
 919                 }
 920                 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
 921                 *p++ = quote;
 922                 *p = '\0';
 923                 _PyString_Resize(
 924                         &v, (p - PyString_AS_STRING(v)));
 925                 return v;
 926         }
 927 }
 928
 929 static PyObject *
 930 string_repr(PyObject *op)
 931 {
 932         return PyString_Repr(op, 1);
 933 }
 934
 935 static PyObject *
 936 string_str(PyObject *s)
 937 {
 938         assert(PyString_Check(s));
 939         if (PyString_CheckExact(s)) {
 940                 Py_INCREF(s);
 941                 return s;
 942         }
 943         else {
 944                 /* Subtype -- return genuine string with the same value. */
 945                 PyStringObject *t = (PyStringObject *) s;
 946                 return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
 947         }
 948 }
 949
 950 static Py_ssize_t
 951 string_length(PyStringObject *a)
 952 {
 953         return Py_SIZE(a);
 954 }
 955
 956 static PyObject *
 957 string_concat(register PyStringObject *a, register PyObject *bb)
 958 {
 959         register Py_ssize_t size;
 960         register PyStringObject *op;
 961         if (!PyString_Check(bb)) {
 962 #ifdef Py_USING_UNICODE
 963                 if (PyUnicode_Check(bb))
 964                     return PyUnicode_Concat((PyObject *)a, bb);
 965 #endif
 966                 if (PyByteArray_Check(bb))
 967                     return PyByteArray_Concat((PyObject *)a, bb);
 968                 PyErr_Format(PyExc_TypeError,
 969                              "cannot concatenate 'str' and '%.200s' objects",
 970                              Py_TYPE(bb)->tp_name);
 971                 return NULL;
 972         }
 973 #define b ((PyStringObject *)bb)
 974         /* Optimize cases with empty left or right operand */
 975         if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
 976             PyString_CheckExact(a) && PyString_CheckExact(b)) {
 977                 if (Py_SIZE(a) == 0) {
 978                         Py_INCREF(bb);
 979                         return bb;
 980                 }
 981                 Py_INCREF(a);
 982                 return (PyObject *)a;
 983         }
 984         size = Py_SIZE(a) + Py_SIZE(b);
 985         /* Check that string sizes are not negative, to prevent an
 986            overflow in cases where we are passed incorrectly-created
 987            strings with negative lengths (due to a bug in other code).
 988         */
 989         if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||
 990             Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {
 991                 PyErr_SetString(PyExc_OverflowError,
 992                                 "strings are too large to concat");
 993                 return NULL;
 994         }
 995
 996         /* Inline PyObject_NewVar */
 997         if (size > PY_SSIZE_T_MAX - sizeof(PyStringObject)) {
 998                 PyErr_SetString(PyExc_OverflowError,
 999                                 "strings are too large to concat");
1000                 return NULL;
1001         }
1002         op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
1003         if (op == NULL)
1004                 return PyErr_NoMemory();
1005         PyObject_INIT_VAR(op, &PyString_Type, size);
1006         op->ob_shash = -1;
1007         op->ob_sstate = SSTATE_NOT_INTERNED;
1008         Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1009         Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
1010         op->ob_sval[size] = '\0';
1011         return (PyObject *) op;
1012 #undef b
1013 }
1014
1015 static PyObject *
1016 string_repeat(register PyStringObject *a, register Py_ssize_t n)
1017 {
1018         register Py_ssize_t i;
1019         register Py_ssize_t j;
1020         register Py_ssize_t size;
1021         register PyStringObject *op;
1022         size_t nbytes;
1023         if (n < 0)
1024                 n = 0;
1025         /* watch out for overflows:  the size can overflow int,
1026          * and the # of bytes needed can overflow size_t
1027          */
1028         size = Py_SIZE(a) * n;
1029         if (n && size / n != Py_SIZE(a)) {
1030                 PyErr_SetString(PyExc_OverflowError,
1031                         "repeated string is too long");
1032                 return NULL;
1033         }
1034         if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
1035                 Py_INCREF(a);
1036                 return (PyObject *)a;
1037         }
1038         nbytes = (size_t)size;
1039         if (nbytes + sizeof(PyStringObject) <= nbytes) {
1040                 PyErr_SetString(PyExc_OverflowError,
1041                         "repeated string is too long");
1042                 return NULL;
1043         }
1044         op = (PyStringObject *)
1045                 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
1046         if (op == NULL)
1047                 return PyErr_NoMemory();
1048         PyObject_INIT_VAR(op, &PyString_Type, size);
1049         op->ob_shash = -1;
1050         op->ob_sstate = SSTATE_NOT_INTERNED;
1051         op->ob_sval[size] = '\0';
1052         if (Py_SIZE(a) == 1 && n > 0) {
1053                 memset(op->ob_sval, a->ob_sval[0] , n);
1054                 return (PyObject *) op;
1055         }
1056         i = 0;
1057         if (i < size) {
1058                 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1059                 i = Py_SIZE(a);
1060         }
1061         while (i < size) {
1062                 j = (i <= size-i)  ?  i  :  size-i;
1063                 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1064                 i += j;
1065         }
1066         return (PyObject *) op;
1067 }
1068
1069 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1070
1071 static PyObject *
1072 string_slice(register PyStringObject *a, register Py_ssize_t i,
1073              register Py_ssize_t j)
1074      /* j -- may be negative! */
1075 {
1076         if (i < 0)
1077                 i = 0;
1078         if (j < 0)
1079                 j = 0; /* Avoid signed/unsigned bug in next line */
1080         if (j > Py_SIZE(a))
1081                 j = Py_SIZE(a);
1082         if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
1083                 /* It's the same as a */
1084                 Py_INCREF(a);
1085                 return (PyObject *)a;
1086         }
1087         if (j < i)
1088                 j = i;
1089         return PyString_FromStringAndSize(a->ob_sval + i, j-i);
1090 }
1091
1092 static int
1093 string_contains(PyObject *str_obj, PyObject *sub_obj)
1094 {
1095         if (!PyString_CheckExact(sub_obj)) {
1096 #ifdef Py_USING_UNICODE
1097                 if (PyUnicode_Check(sub_obj))
1098                         return PyUnicode_Contains(str_obj, sub_obj);
1099 #endif
1100                 if (!PyString_Check(sub_obj)) {
1101                         PyErr_Format(PyExc_TypeError,
1102                             "'in <string>' requires string as left operand, "
1103                             "not %.200s", Py_TYPE(sub_obj)->tp_name);
1104                         return -1;
1105                 }
1106         }
1107
1108         return stringlib_contains_obj(str_obj, sub_obj);
1109 }
1110
1111 static PyObject *
1112 string_item(PyStringObject *a, register Py_ssize_t i)
1113 {
1114         char pchar;
1115         PyObject *v;
1116         if (i < 0 || i >= Py_SIZE(a)) {
1117                 PyErr_SetString(PyExc_IndexError, "string index out of range");
1118                 return NULL;
1119         }
1120         pchar = a->ob_sval[i];
1121         v = (PyObject *)characters[pchar & UCHAR_MAX];
1122         if (v == NULL)
1123                 v = PyString_FromStringAndSize(&pchar, 1);
1124         else {
1125 #ifdef COUNT_ALLOCS
1126                 one_strings++;
1127 #endif
1128                 Py_INCREF(v);
1129         }
1130         return v;
1131 }
1132
1133 static PyObject*
1134 string_richcompare(PyStringObject *a, PyStringObject *b, int op)
1135 {
1136         int c;
1137         Py_ssize_t len_a, len_b;
1138         Py_ssize_t min_len;
1139         PyObject *result;
1140
1141         /* Make sure both arguments are strings. */
1142         if (!(PyString_Check(a) && PyString_Check(b))) {
1143                 result = Py_NotImplemented;
1144                 goto out;
1145         }
1146         if (a == b) {
1147                 switch (op) {
1148                 case Py_EQ:case Py_LE:case Py_GE:
1149                         result = Py_True;
1150                         goto out;
1151                 case Py_NE:case Py_LT:case Py_GT:
1152                         result = Py_False;
1153                         goto out;
1154                 }
1155         }
1156         if (op == Py_EQ) {
1157                 /* Supporting Py_NE here as well does not save
1158                    much time, since Py_NE is rarely used.  */
1159                 if (Py_SIZE(a) == Py_SIZE(b)
1160                     && (a->ob_sval[0] == b->ob_sval[0]
1161                         && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
1162                         result = Py_True;
1163                 } else {
1164                         result = Py_False;
1165                 }
1166                 goto out;
1167         }
1168         len_a = Py_SIZE(a); len_b = Py_SIZE(b);
1169         min_len = (len_a < len_b) ? len_a : len_b;
1170         if (min_len > 0) {
1171                 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1172                 if (c==0)
1173                         c = memcmp(a->ob_sval, b->ob_sval, min_len);
1174         } else
1175                 c = 0;
1176         if (c == 0)
1177                 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1178         switch (op) {
1179         case Py_LT: c = c <  0; break;
1180         case Py_LE: c = c <= 0; break;
1181         case Py_EQ: assert(0);  break; /* unreachable */
1182         case Py_NE: c = c != 0; break;
1183         case Py_GT: c = c >  0; break;
1184         case Py_GE: c = c >= 0; break;
1185         default:
1186                 result = Py_NotImplemented;
1187                 goto out;
1188         }
1189         result = c ? Py_True : Py_False;
1190   out:
1191         Py_INCREF(result);
1192         return result;
1193 }
1194
1195 int
1196 _PyString_Eq(PyObject *o1, PyObject *o2)
1197 {
1198         PyStringObject *a = (PyStringObject*) o1;
1199         PyStringObject *b = (PyStringObject*) o2;
1200         return Py_SIZE(a) == Py_SIZE(b)
1201           && *a->ob_sval == *b->ob_sval
1202           && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
1203 }
1204
1205 static long
1206 string_hash(PyStringObject *a)
1207 {
1208         register Py_ssize_t len;
1209         register unsigned char *p;
1210         register long x;
1211
1212         if (a->ob_shash != -1)
1213                 return a->ob_shash;
1214         len = Py_SIZE(a);
1215         p = (unsigned char *) a->ob_sval;
1216         x = *p << 7;
1217         while (--len >= 0)
1218                 x = (1000003*x) ^ *p++;
1219         x ^= Py_SIZE(a);
1220         if (x == -1)
1221                 x = -2;
1222         a->ob_shash = x;
1223         return x;
1224 }
1225
1226 static PyObject*
1227 string_subscript(PyStringObject* self, PyObject* item)
1228 {
1229         if (PyIndex_Check(item)) {
1230                 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1231                 if (i == -1 && PyErr_Occurred())
1232                         return NULL;
1233                 if (i < 0)
1234                         i += PyString_GET_SIZE(self);
1235                 return string_item(self, i);
1236         }
1237         else if (PySlice_Check(item)) {
1238                 Py_ssize_t start, stop, step, slicelength, cur, i;
1239                 char* source_buf;
1240                 char* result_buf;
1241                 PyObject* result;
1242
1243                 if (PySlice_GetIndicesEx((PySliceObject*)item,
1244                                  PyString_GET_SIZE(self),
1245                                  &start, &stop, &step, &slicelength) < 0) {
1246                         return NULL;
1247                 }
1248
1249                 if (slicelength <= 0) {
1250                         return PyString_FromStringAndSize("", 0);
1251                 }
1252                 else if (start == 0 && step == 1 &&
1253                          slicelength == PyString_GET_SIZE(self) &&
1254                          PyString_CheckExact(self)) {
1255                         Py_INCREF(self);
1256                         return (PyObject *)self;
1257                 }
1258                 else if (step == 1) {
1259                         return PyString_FromStringAndSize(
1260                                 PyString_AS_STRING(self) + start,
1261                                 slicelength);
1262                 }
1263                 else {
1264                         source_buf = PyString_AsString((PyObject*)self);
1265                         result_buf = (char *)PyMem_Malloc(slicelength);
1266                         if (result_buf == NULL)
1267                                 return PyErr_NoMemory();
1268
1269                         for (cur = start, i = 0; i < slicelength;
1270                              cur += step, i++) {
1271                                 result_buf[i] = source_buf[cur];
1272                         }
1273
1274                         result = PyString_FromStringAndSize(result_buf,
1275                                                             slicelength);
1276                         PyMem_Free(result_buf);
1277                         return result;
1278                 }
1279         }
1280         else {
1281                 PyErr_Format(PyExc_TypeError,
1282                              "string indices must be integers, not %.200s",
1283                              Py_TYPE(item)->tp_name);
1284                 return NULL;
1285         }
1286 }
1287
1288 static Py_ssize_t
1289 string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1290 {
1291         if ( index != 0 ) {
1292                 PyErr_SetString(PyExc_SystemError,
1293                                 "accessing non-existent string segment");
1294                 return -1;
1295         }
1296         *ptr = (void *)self->ob_sval;
1297         return Py_SIZE(self);
1298 }
1299
1300 static Py_ssize_t
1301 string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1302 {
1303         PyErr_SetString(PyExc_TypeError,
1304                         "Cannot use string as modifiable buffer");
1305         return -1;
1306 }
1307
1308 static Py_ssize_t
1309 string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
1310 {
1311         if ( lenp )
1312                 *lenp = Py_SIZE(self);
1313         return 1;
1314 }
1315
1316 static Py_ssize_t
1317 string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
1318 {
1319         if ( index != 0 ) {
1320                 PyErr_SetString(PyExc_SystemError,
1321                                 "accessing non-existent string segment");
1322                 return -1;
1323         }
1324         *ptr = self->ob_sval;
1325         return Py_SIZE(self);
1326 }
1327
1328 static int
1329 string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
1330 {
1331         return PyBuffer_FillInfo(view, (void *)self->ob_sval, Py_SIZE(self),
1332                                  0, flags);
1333 }
1334
1335 static PySequenceMethods string_as_sequence = {
1336         (lenfunc)string_length, /*sq_length*/
1337         (binaryfunc)string_concat, /*sq_concat*/
1338         (ssizeargfunc)string_repeat, /*sq_repeat*/
1339         (ssizeargfunc)string_item, /*sq_item*/
1340         (ssizessizeargfunc)string_slice, /*sq_slice*/
1341         0,              /*sq_ass_item*/
1342         0,              /*sq_ass_slice*/
1343         (objobjproc)string_contains /*sq_contains*/
1344 };
1345
1346 static PyMappingMethods string_as_mapping = {
1347         (lenfunc)string_length,
1348         (binaryfunc)string_subscript,
1349         0,
1350 };
1351
1352 static PyBufferProcs string_as_buffer = {
1353         (readbufferproc)string_buffer_getreadbuf,
1354         (writebufferproc)string_buffer_getwritebuf,
1355         (segcountproc)string_buffer_getsegcount,
1356         (charbufferproc)string_buffer_getcharbuf,
1357         (getbufferproc)string_buffer_getbuffer,
1358         0, /* XXX */
1359 };
1360
1361
1362 \f
1363 #define LEFTSTRIP 0
1364 #define RIGHTSTRIP 1
1365 #define BOTHSTRIP 2
1366
1367 /* Arrays indexed by above */
1368 static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1369
1370 #define STRIPNAME(i) (stripformat[i]+3)
1371
1372
1373 /* Don't call if length < 2 */
1374 #define Py_STRING_MATCH(target, offset, pattern, length)        \
1375   (target[offset] == pattern[0] &&                              \
1376    target[offset+length-1] == pattern[length-1] &&              \
1377    !memcmp(target+offset+1, pattern+1, length-2) )
1378
1379
1380 /* Overallocate the initial list to reduce the number of reallocs for small
1381    split sizes.  Eg, "A A A A A A A A A A".split() (10 elements) has three
1382    resizes, to sizes 4, 8, then 16.  Most observed string splits are for human
1383    text (roughly 11 words per line) and field delimited data (usually 1-10
1384    fields).  For large strings the split algorithms are bandwidth limited
1385    so increasing the preallocation likely will not improve things.*/
1386
1387 #define MAX_PREALLOC 12
1388
1389 /* 5 splits gives 6 elements */
1390 #define PREALLOC_SIZE(maxsplit) \
1391         (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1392
1393 #define SPLIT_APPEND(data, left, right)                         \
1394         str = PyString_FromStringAndSize((data) + (left),       \
1395                                          (right) - (left));     \
1396         if (str == NULL)                                        \
1397                 goto onError;                                   \
1398         if (PyList_Append(list, str)) {                         \
1399                 Py_DECREF(str);                                 \
1400                 goto onError;                                   \
1401         }                                                       \
1402         else                                                    \
1403                 Py_DECREF(str);
1404
1405 #define SPLIT_ADD(data, left, right) {                          \
1406         str = PyString_FromStringAndSize((data) + (left),       \
1407                                          (right) - (left));     \
1408         if (str == NULL)                                        \
1409                 goto onError;                                   \
1410         if (count < MAX_PREALLOC) {                             \
1411                 PyList_SET_ITEM(list, count, str);              \
1412         } else {                                                \
1413                 if (PyList_Append(list, str)) {                 \
1414                         Py_DECREF(str);                         \
1415                         goto onError;                           \
1416                 }                                               \
1417                 else                                            \
1418                         Py_DECREF(str);                         \
1419         }                                                       \
1420         count++; }
1421
1422 /* Always force the list to the expected size. */
1423 #define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
1424
1425 #define SKIP_SPACE(s, i, len)    { while (i<len &&  isspace(Py_CHARMASK(s[i]))) i++; }
1426 #define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1427 #define RSKIP_SPACE(s, i)        { while (i>=0  &&  isspace(Py_CHARMASK(s[i]))) i--; }
1428 #define RSKIP_NONSPACE(s, i)     { while (i>=0  && !isspace(Py_CHARMASK(s[i]))) i--; }
1429
1430 Py_LOCAL_INLINE(PyObject *)
1431 split_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
1432 {
1433         const char *s = PyString_AS_STRING(self);
1434         Py_ssize_t i, j, count=0;
1435         PyObject *str;
1436         PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
1437
1438         if (list == NULL)
1439                 return NULL;
1440
1441         i = j = 0;
1442
1443         while (maxsplit-- > 0) {
1444                 SKIP_SPACE(s, i, len);
1445                 if (i==len) break;
1446                 j = i; i++;
1447                 SKIP_NONSPACE(s, i, len);
1448                 if (j == 0 && i == len && PyString_CheckExact(self)) {
1449                         /* No whitespace in self, so just use it as list[0] */
1450                         Py_INCREF(self);
1451                         PyList_SET_ITEM(list, 0, (PyObject *)self);
1452                         count++;
1453                         break;
1454                 }
1455                 SPLIT_ADD(s, j, i);
1456         }
1457
1458         if (i < len) {
1459                 /* Only occurs when maxsplit was reached */
1460                 /* Skip any remaining whitespace and copy to end of string */
1461                 SKIP_SPACE(s, i, len);
1462                 if (i != len)
1463                         SPLIT_ADD(s, i, len);
1464         }
1465         FIX_PREALLOC_SIZE(list);
1466         return list;
1467   onError:
1468         Py_DECREF(list);
1469         return NULL;
1470 }
1471
1472 Py_LOCAL_INLINE(PyObject *)
1473 split_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
1474 {
1475         const char *s = PyString_AS_STRING(self);
1476         register Py_ssize_t i, j, count=0;
1477         PyObject *str;
1478         PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
1479
1480         if (list == NULL)
1481                 return NULL;
1482
1483         i = j = 0;
1484         while ((j < len) && (maxcount-- > 0)) {
1485                 for(; j<len; j++) {
1486                         /* I found that using memchr makes no difference */
1487                         if (s[j] == ch) {
1488                                 SPLIT_ADD(s, i, j);
1489                                 i = j = j + 1;
1490                                 break;
1491                         }
1492                 }
1493         }
1494         if (i == 0 && count == 0 && PyString_CheckExact(self)) {
1495                 /* ch not in self, so just use self as list[0] */
1496                 Py_INCREF(self);
1497                 PyList_SET_ITEM(list, 0, (PyObject *)self);
1498                 count++;
1499         }
1500         else if (i <= len) {
1501                 SPLIT_ADD(s, i, len);
1502         }
1503         FIX_PREALLOC_SIZE(list);
1504         return list;
1505
1506   onError:
1507         Py_DECREF(list);
1508         return NULL;
1509 }
1510
1511 PyDoc_STRVAR(split__doc__,
1512 "S.split([sep [,maxsplit]]) -> list of strings\n\
1513 \n\
1514 Return a list of the words in the string S, using sep as the\n\
1515 delimiter string.  If maxsplit is given, at most maxsplit\n\
1516 splits are done. If sep is not specified or is None, any\n\
1517 whitespace string is a separator and empty strings are removed\n\
1518 from the result.");
1519
1520 static PyObject *
1521 string_split(PyStringObject *self, PyObject *args)
1522 {
1523         Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1524         Py_ssize_t maxsplit = -1, count=0;
1525         const char *s = PyString_AS_STRING(self), *sub;
1526         PyObject *list, *str, *subobj = Py_None;
1527 #ifdef USE_FAST
1528         Py_ssize_t pos;
1529 #endif
1530
1531         if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1532                 return NULL;
1533         if (maxsplit < 0)
1534                 maxsplit = PY_SSIZE_T_MAX;
1535         if (subobj == Py_None)
1536                 return split_whitespace(self, len, maxsplit);
1537         if (PyString_Check(subobj)) {
1538                 sub = PyString_AS_STRING(subobj);
1539                 n = PyString_GET_SIZE(subobj);
1540         }
1541 #ifdef Py_USING_UNICODE
1542         else if (PyUnicode_Check(subobj))
1543                 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1544 #endif
1545         else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1546                 return NULL;
1547
1548         if (n == 0) {
1549                 PyErr_SetString(PyExc_ValueError, "empty separator");
1550                 return NULL;
1551         }
1552         else if (n == 1)
1553                 return split_char(self, len, sub[0], maxsplit);
1554
1555         list = PyList_New(PREALLOC_SIZE(maxsplit));
1556         if (list == NULL)
1557                 return NULL;
1558
1559 #ifdef USE_FAST
1560         i = j = 0;
1561         while (maxsplit-- > 0) {
1562                 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1563                 if (pos < 0)
1564                         break;
1565                 j = i+pos;
1566                 SPLIT_ADD(s, i, j);
1567                 i = j + n;
1568         }
1569 #else
1570         i = j = 0;
1571         while ((j+n <= len) && (maxsplit-- > 0)) {
1572                 for (; j+n <= len; j++) {
1573                         if (Py_STRING_MATCH(s, j, sub, n)) {
1574                                 SPLIT_ADD(s, i, j);
1575                                 i = j = j + n;
1576                                 break;
1577                         }
1578                 }
1579         }
1580 #endif
1581         SPLIT_ADD(s, i, len);
1582         FIX_PREALLOC_SIZE(list);
1583         return list;
1584
1585  onError:
1586         Py_DECREF(list);
1587         return NULL;
1588 }
1589
1590 PyDoc_STRVAR(partition__doc__,
1591 "S.partition(sep) -> (head, sep, tail)\n\
1592 \n\
1593 Searches for the separator sep in S, and returns the part before it,\n\
1594 the separator itself, and the part after it.  If the separator is not\n\
1595 found, returns S and two empty strings.");
1596
1597 static PyObject *
1598 string_partition(PyStringObject *self, PyObject *sep_obj)
1599 {
1600         const char *sep;
1601         Py_ssize_t sep_len;
1602
1603         if (PyString_Check(sep_obj)) {
1604                 sep = PyString_AS_STRING(sep_obj);
1605                 sep_len = PyString_GET_SIZE(sep_obj);
1606         }
1607 #ifdef Py_USING_UNICODE
1608         else if (PyUnicode_Check(sep_obj))
1609                 return PyUnicode_Partition((PyObject *) self, sep_obj);
1610 #endif
1611         else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1612                 return NULL;
1613
1614         return stringlib_partition(
1615                 (PyObject*) self,
1616                 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1617                 sep_obj, sep, sep_len
1618                 );
1619 }
1620
1621 PyDoc_STRVAR(rpartition__doc__,
1622 "S.rpartition(sep) -> (tail, sep, head)\n\
1623 \n\
1624 Searches for the separator sep in S, starting at the end of S, and returns\n\
1625 the part before it, the separator itself, and the part after it.  If the\n\
1626 separator is not found, returns two empty strings and S.");
1627
1628 static PyObject *
1629 string_rpartition(PyStringObject *self, PyObject *sep_obj)
1630 {
1631         const char *sep;
1632         Py_ssize_t sep_len;
1633
1634         if (PyString_Check(sep_obj)) {
1635                 sep = PyString_AS_STRING(sep_obj);
1636                 sep_len = PyString_GET_SIZE(sep_obj);
1637         }
1638 #ifdef Py_USING_UNICODE
1639         else if (PyUnicode_Check(sep_obj))
1640                 return PyUnicode_Partition((PyObject *) self, sep_obj);
1641 #endif
1642         else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1643                 return NULL;
1644
1645         return stringlib_rpartition(
1646                 (PyObject*) self,
1647                 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1648                 sep_obj, sep, sep_len
1649                 );
1650 }
1651
1652 Py_LOCAL_INLINE(PyObject *)
1653 rsplit_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
1654 {
1655         const char *s = PyString_AS_STRING(self);
1656         Py_ssize_t i, j, count=0;
1657         PyObject *str;
1658         PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
1659
1660         if (list == NULL)
1661                 return NULL;
1662
1663         i = j = len-1;
1664
1665         while (maxsplit-- > 0) {
1666                 RSKIP_SPACE(s, i);
1667                 if (i<0) break;
1668                 j = i; i--;
1669                 RSKIP_NONSPACE(s, i);
1670                 if (j == len-1 && i < 0 && PyString_CheckExact(self)) {
1671                         /* No whitespace in self, so just use it as list[0] */
1672                         Py_INCREF(self);
1673                         PyList_SET_ITEM(list, 0, (PyObject *)self);
1674                         count++;
1675                         break;
1676                 }
1677                 SPLIT_ADD(s, i + 1, j + 1);
1678         }
1679         if (i >= 0) {
1680                 /* Only occurs when maxsplit was reached */
1681                 /* Skip any remaining whitespace and copy to beginning of string */
1682                 RSKIP_SPACE(s, i);
1683                 if (i >= 0)
1684                         SPLIT_ADD(s, 0, i + 1);
1685
1686         }
1687         FIX_PREALLOC_SIZE(list);
1688         if (PyList_Reverse(list) < 0)
1689                 goto onError;
1690         return list;
1691   onError:
1692         Py_DECREF(list);
1693         return NULL;
1694 }
1695
1696 Py_LOCAL_INLINE(PyObject *)
1697 rsplit_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
1698 {
1699         const char *s = PyString_AS_STRING(self);
1700         register Py_ssize_t i, j, count=0;
1701         PyObject *str;
1702         PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
1703
1704         if (list == NULL)
1705                 return NULL;
1706
1707         i = j = len - 1;
1708         while ((i >= 0) && (maxcount-- > 0)) {
1709                 for (; i >= 0; i--) {
1710                         if (s[i] == ch) {
1711                                 SPLIT_ADD(s, i + 1, j + 1);
1712                                 j = i = i - 1;
1713                                 break;
1714                         }
1715                 }
1716         }
1717         if (i < 0 && count == 0 && PyString_CheckExact(self)) {
1718                 /* ch not in self, so just use self as list[0] */
1719                 Py_INCREF(self);
1720                 PyList_SET_ITEM(list, 0, (PyObject *)self);
1721                 count++;
1722         }
1723         else if (j >= -1) {
1724                 SPLIT_ADD(s, 0, j + 1);
1725         }
1726         FIX_PREALLOC_SIZE(list);
1727         if (PyList_Reverse(list) < 0)
1728                 goto onError;
1729         return list;
1730
1731  onError:
1732         Py_DECREF(list);
1733         return NULL;
1734 }
1735
1736 PyDoc_STRVAR(rsplit__doc__,
1737 "S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1738 \n\
1739 Return a list of the words in the string S, using sep as the\n\
1740 delimiter string, starting at the end of the string and working\n\
1741 to the front.  If maxsplit is given, at most maxsplit splits are\n\
1742 done. If sep is not specified or is None, any whitespace string\n\
1743 is a separator.");
1744
1745 static PyObject *
1746 string_rsplit(PyStringObject *self, PyObject *args)
1747 {
1748         Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1749         Py_ssize_t maxsplit = -1, count=0;
1750         const char *s, *sub;
1751         PyObject *list, *str, *subobj = Py_None;
1752
1753         if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1754                 return NULL;
1755         if (maxsplit < 0)
1756                 maxsplit = PY_SSIZE_T_MAX;
1757         if (subobj == Py_None)
1758                 return rsplit_whitespace(self, len, maxsplit);
1759         if (PyString_Check(subobj)) {
1760                 sub = PyString_AS_STRING(subobj);
1761                 n = PyString_GET_SIZE(subobj);
1762         }
1763 #ifdef Py_USING_UNICODE
1764         else if (PyUnicode_Check(subobj))
1765                 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1766 #endif
1767         else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1768                 return NULL;
1769
1770         if (n == 0) {
1771                 PyErr_SetString(PyExc_ValueError, "empty separator");
1772                 return NULL;
1773         }
1774         else if (n == 1)
1775                 return rsplit_char(self, len, sub[0], maxsplit);
1776
1777         list = PyList_New(PREALLOC_SIZE(maxsplit));
1778         if (list == NULL)
1779                 return NULL;
1780
1781         j = len;
1782         i = j - n;
1783
1784         s = PyString_AS_STRING(self);
1785         while ( (i >= 0) && (maxsplit-- > 0) ) {
1786                 for (; i>=0; i--) {
1787                         if (Py_STRING_MATCH(s, i, sub, n)) {
1788                                 SPLIT_ADD(s, i + n, j);
1789                                 j = i;
1790                                 i -= n;
1791                                 break;
1792                         }
1793                 }
1794         }
1795         SPLIT_ADD(s, 0, j);
1796         FIX_PREALLOC_SIZE(list);
1797         if (PyList_Reverse(list) < 0)
1798                 goto onError;
1799         return list;
1800
1801 onError:
1802         Py_DECREF(list);
1803         return NULL;
1804 }
1805
1806
1807 PyDoc_STRVAR(join__doc__,
1808 "S.join(sequence) -> string\n\
1809 \n\
1810 Return a string which is the concatenation of the strings in the\n\
1811 sequence.  The separator between elements is S.");
1812
1813 static PyObject *
1814 string_join(PyStringObject *self, PyObject *orig)
1815 {
1816         char *sep = PyString_AS_STRING(self);
1817         const Py_ssize_t seplen = PyString_GET_SIZE(self);
1818         PyObject *res = NULL;
1819         char *p;
1820         Py_ssize_t seqlen = 0;
1821         size_t sz = 0;
1822         Py_ssize_t i;
1823         PyObject *seq, *item;
1824
1825         seq = PySequence_Fast(orig, "");
1826         if (seq == NULL) {
1827                 return NULL;
1828         }
1829
1830         seqlen = PySequence_Size(seq);
1831         if (seqlen == 0) {
1832                 Py_DECREF(seq);
1833                 return PyString_FromString("");
1834         }
1835         if (seqlen == 1) {
1836                 item = PySequence_Fast_GET_ITEM(seq, 0);
1837                 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1838                         Py_INCREF(item);
1839                         Py_DECREF(seq);
1840                         return item;
1841                 }
1842         }
1843
1844         /* There are at least two things to join, or else we have a subclass
1845          * of the builtin types in the sequence.
1846          * Do a pre-pass to figure out the total amount of space we'll
1847          * need (sz), see whether any argument is absurd, and defer to
1848          * the Unicode join if appropriate.
1849          */
1850         for (i = 0; i < seqlen; i++) {
1851                 const size_t old_sz = sz;
1852                 item = PySequence_Fast_GET_ITEM(seq, i);
1853                 if (!PyString_Check(item)){
1854 #ifdef Py_USING_UNICODE
1855                         if (PyUnicode_Check(item)) {
1856                                 /* Defer to Unicode join.
1857                                  * CAUTION:  There's no gurantee that the
1858                                  * original sequence can be iterated over
1859                                  * again, so we must pass seq here.
1860                                  */
1861                                 PyObject *result;
1862                                 result = PyUnicode_Join((PyObject *)self, seq);
1863                                 Py_DECREF(seq);
1864                                 return result;
1865                         }
1866 #endif
1867                         PyErr_Format(PyExc_TypeError,
1868                                      "sequence item %zd: expected string,"
1869                                      " %.80s found",
1870                                      i, Py_TYPE(item)->tp_name);
1871                         Py_DECREF(seq);
1872                         return NULL;
1873                 }
1874                 sz += PyString_GET_SIZE(item);
1875                 if (i != 0)
1876                         sz += seplen;
1877                 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1878                         PyErr_SetString(PyExc_OverflowError,
1879                                 "join() result is too long for a Python string");
1880                         Py_DECREF(seq);
1881                         return NULL;
1882                 }
1883         }
1884
1885         /* Allocate result space. */
1886         res = PyString_FromStringAndSize((char*)NULL, sz);
1887         if (res == NULL) {
1888                 Py_DECREF(seq);
1889                 return NULL;
1890         }
1891
1892         /* Catenate everything. */
1893         p = PyString_AS_STRING(res);
1894         for (i = 0; i < seqlen; ++i) {
1895                 size_t n;
1896                 item = PySequence_Fast_GET_ITEM(seq, i);
1897                 n = PyString_GET_SIZE(item);
1898                 Py_MEMCPY(p, PyString_AS_STRING(item), n);
1899                 p += n;
1900                 if (i < seqlen - 1) {
1901                         Py_MEMCPY(p, sep, seplen);
1902                         p += seplen;
1903                 }
1904         }
1905
1906         Py_DECREF(seq);
1907         return res;
1908 }
1909
1910 PyObject *
1911 _PyString_Join(PyObject *sep, PyObject *x)
1912 {
1913         assert(sep != NULL && PyString_Check(sep));
1914         assert(x != NULL);
1915         return string_join((PyStringObject *)sep, x);
1916 }
1917
1918 Py_LOCAL_INLINE(void)
1919 string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1920 {
1921         if (*end > len)
1922                 *end = len;
1923         else if (*end < 0)
1924                 *end += len;
1925         if (*end < 0)
1926                 *end = 0;
1927         if (*start < 0)
1928                 *start += len;
1929         if (*start < 0)
1930                 *start = 0;
1931 }
1932
1933 Py_LOCAL_INLINE(Py_ssize_t)
1934 string_find_internal(PyStringObject *self, PyObject *args, int dir)
1935 {
1936         PyObject *subobj;
1937         const char *sub;
1938         Py_ssize_t sub_len;
1939         Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1940         PyObject *obj_start=Py_None, *obj_end=Py_None;
1941
1942         if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1943                 &obj_start, &obj_end))
1944                 return -2;
1945         /* To support None in "start" and "end" arguments, meaning
1946            the same as if they were not passed.
1947         */
1948         if (obj_start != Py_None)
1949                 if (!_PyEval_SliceIndex(obj_start, &start))
1950                 return -2;
1951         if (obj_end != Py_None)
1952                 if (!_PyEval_SliceIndex(obj_end, &end))
1953                 return -2;
1954
1955         if (PyString_Check(subobj)) {
1956                 sub = PyString_AS_STRING(subobj);
1957                 sub_len = PyString_GET_SIZE(subobj);
1958         }
1959 #ifdef Py_USING_UNICODE
1960         else if (PyUnicode_Check(subobj))
1961                 return PyUnicode_Find(
1962                         (PyObject *)self, subobj, start, end, dir);
1963 #endif
1964         else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1965                 /* XXX - the "expected a character buffer object" is pretty
1966                    confusing for a non-expert.  remap to something else ? */
1967                 return -2;
1968
1969         if (dir > 0)
1970                 return stringlib_find_slice(
1971                         PyString_AS_STRING(self), PyString_GET_SIZE(self),
1972                         sub, sub_len, start, end);
1973         else
1974                 return stringlib_rfind_slice(
1975                         PyString_AS_STRING(self), PyString_GET_SIZE(self),
1976                         sub, sub_len, start, end);
1977 }
1978
1979
1980 PyDoc_STRVAR(find__doc__,
1981 "S.find(sub [,start [,end]]) -> int\n\
1982 \n\
1983 Return the lowest index in S where substring sub is found,\n\
1984 such that sub is contained within s[start:end].  Optional\n\
1985 arguments start and end are interpreted as in slice notation.\n\
1986 \n\
1987 Return -1 on failure.");
1988
1989 static PyObject *
1990 string_find(PyStringObject *self, PyObject *args)
1991 {
1992         Py_ssize_t result = string_find_internal(self, args, +1);
1993         if (result == -2)
1994                 return NULL;
1995         return PyInt_FromSsize_t(result);
1996 }
1997
1998
1999 PyDoc_STRVAR(index__doc__,
2000 "S.index(sub [,start [,end]]) -> int\n\
2001 \n\
2002 Like S.find() but raise ValueError when the substring is not found.");
2003
2004 static PyObject *
2005 string_index(PyStringObject *self, PyObject *args)
2006 {
2007         Py_ssize_t result = string_find_internal(self, args, +1);
2008         if (result == -2)
2009                 return NULL;
2010         if (result == -1) {
2011                 PyErr_SetString(PyExc_ValueError,
2012                                 "substring not found");
2013                 return NULL;
2014         }
2015         return PyInt_FromSsize_t(result);
2016 }
2017
2018
2019 PyDoc_STRVAR(rfind__doc__,
2020 "S.rfind(sub [,start [,end]]) -> int\n\
2021 \n\
2022 Return the highest index in S where substring sub is found,\n\
2023 such that sub is contained within s[start:end].  Optional\n\
2024 arguments start and end are interpreted as in slice notation.\n\
2025 \n\
2026 Return -1 on failure.");
2027
2028 static PyObject *
2029 string_rfind(PyStringObject *self, PyObject *args)
2030 {
2031         Py_ssize_t result = string_find_internal(self, args, -1);
2032         if (result == -2)
2033                 return NULL;
2034         return PyInt_FromSsize_t(result);
2035 }
2036
2037
2038 PyDoc_STRVAR(rindex__doc__,
2039 "S.rindex(sub [,start [,end]]) -> int\n\
2040 \n\
2041 Like S.rfind() but raise ValueError when the substring is not found.");
2042
2043 static PyObject *
2044 string_rindex(PyStringObject *self, PyObject *args)
2045 {
2046         Py_ssize_t result = string_find_internal(self, args, -1);
2047         if (result == -2)
2048                 return NULL;
2049         if (result == -1) {
2050                 PyErr_SetString(PyExc_ValueError,
2051                                 "substring not found");
2052                 return NULL;
2053         }
2054         return PyInt_FromSsize_t(result);
2055 }
2056
2057
2058 Py_LOCAL_INLINE(PyObject *)
2059 do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
2060 {
2061         char *s = PyString_AS_STRING(self);
2062         Py_ssize_t len = PyString_GET_SIZE(self);
2063         char *sep = PyString_AS_STRING(sepobj);
2064         Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
2065         Py_ssize_t i, j;
2066
2067         i = 0;
2068         if (striptype != RIGHTSTRIP) {
2069                 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2070                         i++;
2071                 }
2072         }
2073
2074         j = len;
2075         if (striptype != LEFTSTRIP) {
2076                 do {
2077                         j--;
2078                 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2079                 j++;
2080         }
2081
2082         if (i == 0 && j == len && PyString_CheckExact(self)) {
2083                 Py_INCREF(self);
2084                 return (PyObject*)self;
2085         }
2086         else
2087                 return PyString_FromStringAndSize(s+i, j-i);
2088 }
2089
2090
2091 Py_LOCAL_INLINE(PyObject *)
2092 do_strip(PyStringObject *self, int striptype)
2093 {
2094         char *s = PyString_AS_STRING(self);
2095         Py_ssize_t len = PyString_GET_SIZE(self), i, j;
2096
2097         i = 0;
2098         if (striptype != RIGHTSTRIP) {
2099                 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2100                         i++;
2101                 }
2102         }
2103
2104         j = len;
2105         if (striptype != LEFTSTRIP) {
2106                 do {
2107                         j--;
2108                 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2109                 j++;
2110         }
2111
2112         if (i == 0 && j == len && PyString_CheckExact(self)) {
2113                 Py_INCREF(self);
2114                 return (PyObject*)self;
2115         }
2116         else
2117                 return PyString_FromStringAndSize(s+i, j-i);
2118 }
2119
2120
2121 Py_LOCAL_INLINE(PyObject *)
2122 do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2123 {
2124         PyObject *sep = NULL;
2125
2126         if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
2127                 return NULL;
2128
2129         if (sep != NULL && sep != Py_None) {
2130                 if (PyString_Check(sep))
2131                         return do_xstrip(self, striptype, sep);
2132 #ifdef Py_USING_UNICODE
2133                 else if (PyUnicode_Check(sep)) {
2134                         PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2135                         PyObject *res;
2136                         if (uniself==NULL)
2137                                 return NULL;
2138                         res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2139                                 striptype, sep);
2140                         Py_DECREF(uniself);
2141                         return res;
2142                 }
2143 #endif
2144                 PyErr_Format(PyExc_TypeError,
2145 #ifdef Py_USING_UNICODE
2146                              "%s arg must be None, str or unicode",
2147 #else
2148                              "%s arg must be None or str",
2149 #endif
2150                              STRIPNAME(striptype));
2151                 return NULL;
2152         }
2153
2154         return do_strip(self, striptype);
2155 }
2156
2157
2158 PyDoc_STRVAR(strip__doc__,
2159 "S.strip([chars]) -> string or unicode\n\
2160 \n\
2161 Return a copy of the string S with leading and trailing\n\
2162 whitespace removed.\n\
2163 If chars is given and not None, remove characters in chars instead.\n\
2164 If chars is unicode, S will be converted to unicode before stripping");
2165
2166 static PyObject *
2167 string_strip(PyStringObject *self, PyObject *args)
2168 {
2169         if (PyTuple_GET_SIZE(args) == 0)
2170                 return do_strip(self, BOTHSTRIP); /* Common case */
2171         else
2172                 return do_argstrip(self, BOTHSTRIP, args);
2173 }
2174
2175
2176 PyDoc_STRVAR(lstrip__doc__,
2177 "S.lstrip([chars]) -> string or unicode\n\
2178 \n\
2179 Return a copy of the string S with leading whitespace removed.\n\
2180 If chars is given and not None, remove characters in chars instead.\n\
2181 If chars is unicode, S will be converted to unicode before stripping");
2182
2183 static PyObject *
2184 string_lstrip(PyStringObject *self, PyObject *args)
2185 {
2186         if (PyTuple_GET_SIZE(args) == 0)
2187                 return do_strip(self, LEFTSTRIP); /* Common case */
2188         else
2189                 return do_argstrip(self, LEFTSTRIP, args);
2190 }
2191
2192
2193 PyDoc_STRVAR(rstrip__doc__,
2194 "S.rstrip([chars]) -> string or unicode\n\
2195 \n\
2196 Return a copy of the string S with trailing whitespace removed.\n\
2197 If chars is given and not None, remove characters in chars instead.\n\
2198 If chars is unicode, S will be converted to unicode before stripping");
2199
2200 static PyObject *
2201 string_rstrip(PyStringObject *self, PyObject *args)
2202 {
2203         if (PyTuple_GET_SIZE(args) == 0)
2204                 return do_strip(self, RIGHTSTRIP); /* Common case */
2205         else
2206                 return do_argstrip(self, RIGHTSTRIP, args);
2207 }
2208
2209
2210 PyDoc_STRVAR(lower__doc__,
2211 "S.lower() -> string\n\
2212 \n\
2213 Return a copy of the string S converted to lowercase.");
2214
2215 /* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2216 #ifndef _tolower
2217 #define _tolower tolower
2218 #endif
2219
2220 static PyObject *
2221 string_lower(PyStringObject *self)
2222 {
2223         char *s;
2224         Py_ssize_t i, n = PyString_GET_SIZE(self);
2225         PyObject *newobj;
2226
2227         newobj = PyString_FromStringAndSize(NULL, n);
2228         if (!newobj)
2229                 return NULL;
2230
2231         s = PyString_AS_STRING(newobj);
2232
2233         Py_MEMCPY(s, PyString_AS_STRING(self), n);
2234
2235         for (i = 0; i < n; i++) {
2236                 int c = Py_CHARMASK(s[i]);
2237                 if (isupper(c))
2238                         s[i] = _tolower(c);
2239         }
2240
2241         return newobj;
2242 }
2243
2244 PyDoc_STRVAR(upper__doc__,
2245 "S.upper() -> string\n\
2246 \n\
2247 Return a copy of the string S converted to uppercase.");
2248
2249 #ifndef _toupper
2250 #define _toupper toupper
2251 #endif
2252
2253 static PyObject *
2254 string_upper(PyStringObject *self)
2255 {
2256         char *s;
2257         Py_ssize_t i, n = PyString_GET_SIZE(self);
2258         PyObject *newobj;
2259
2260         newobj = PyString_FromStringAndSize(NULL, n);
2261         if (!newobj)
2262                 return NULL;
2263
2264         s = PyString_AS_STRING(newobj);
2265
2266         Py_MEMCPY(s, PyString_AS_STRING(self), n);
2267
2268         for (i = 0; i < n; i++) {
2269                 int c = Py_CHARMASK(s[i]);
2270                 if (islower(c))
2271                         s[i] = _toupper(c);
2272         }
2273
2274         return newobj;
2275 }
2276
2277 PyDoc_STRVAR(title__doc__,
2278 "S.title() -> string\n\
2279 \n\
2280 Return a titlecased version of S, i.e. words start with uppercase\n\
2281 characters, all remaining cased characters have lowercase.");
2282
2283 static PyObject*
2284 string_title(PyStringObject *self)
2285 {
2286         char *s = PyString_AS_STRING(self), *s_new;
2287         Py_ssize_t i, n = PyString_GET_SIZE(self);
2288         int previous_is_cased = 0;
2289         PyObject *newobj;
2290
2291         newobj = PyString_FromStringAndSize(NULL, n);
2292         if (newobj == NULL)
2293                 return NULL;
2294         s_new = PyString_AsString(newobj);
2295         for (i = 0; i < n; i++) {
2296                 int c = Py_CHARMASK(*s++);
2297                 if (islower(c)) {
2298                         if (!previous_is_cased)
2299                             c = toupper(c);
2300                         previous_is_cased = 1;
2301                 } else if (isupper(c)) {
2302                         if (previous_is_cased)
2303                             c = tolower(c);
2304                         previous_is_cased = 1;
2305                 } else
2306                         previous_is_cased = 0;
2307                 *s_new++ = c;
2308         }
2309         return newobj;
2310 }
2311
2312 PyDoc_STRVAR(capitalize__doc__,
2313 "S.capitalize() -> string\n\
2314 \n\
2315 Return a copy of the string S with only its first character\n\
2316 capitalized.");
2317
2318 static PyObject *
2319 string_capitalize(PyStringObject *self)
2320 {
2321         char *s = PyString_AS_STRING(self), *s_new;
2322         Py_ssize_t i, n = PyString_GET_SIZE(self);
2323         PyObject *newobj;
2324
2325         newobj = PyString_FromStringAndSize(NULL, n);
2326         if (newobj == NULL)
2327                 return NULL;
2328         s_new = PyString_AsString(newobj);
2329         if (0 < n) {
2330                 int c = Py_CHARMASK(*s++);
2331                 if (islower(c))
2332                         *s_new = toupper(c);
2333                 else
2334                         *s_new = c;
2335                 s_new++;
2336         }
2337         for (i = 1; i < n; i++) {
2338                 int c = Py_CHARMASK(*s++);
2339                 if (isupper(c))
2340                         *s_new = tolower(c);
2341                 else
2342                         *s_new = c;
2343                 s_new++;
2344         }
2345         return newobj;
2346 }
2347
2348
2349 PyDoc_STRVAR(count__doc__,
2350 "S.count(sub[, start[, end]]) -> int\n\
2351 \n\
2352 Return the number of non-overlapping occurrences of substring sub in\n\
2353 string S[start:end].  Optional arguments start and end are interpreted\n\
2354 as in slice notation.");
2355
2356 static PyObject *
2357 string_count(PyStringObject *self, PyObject *args)
2358 {
2359         PyObject *sub_obj;
2360         const char *str = PyString_AS_STRING(self), *sub;
2361         Py_ssize_t sub_len;
2362         Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
2363
2364         if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2365                 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2366                 return NULL;
2367
2368         if (PyString_Check(sub_obj)) {
2369                 sub = PyString_AS_STRING(sub_obj);
2370                 sub_len = PyString_GET_SIZE(sub_obj);
2371         }
2372 #ifdef Py_USING_UNICODE
2373         else if (PyUnicode_Check(sub_obj)) {
2374                 Py_ssize_t count;
2375                 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2376                 if (count == -1)
2377                         return NULL;
2378                 else
2379                         return PyInt_FromSsize_t(count);
2380         }
2381 #endif
2382         else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2383                 return NULL;
2384
2385         string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
2386
2387         return PyInt_FromSsize_t(
2388                 stringlib_count(str + start, end - start, sub, sub_len)
2389                 );
2390 }
2391
2392 PyDoc_STRVAR(swapcase__doc__,
2393 "S.swapcase() -> string\n\
2394 \n\
2395 Return a copy of the string S with uppercase characters\n\
2396 converted to lowercase and vice versa.");
2397
2398 static PyObject *
2399 string_swapcase(PyStringObject *self)
2400 {
2401         char *s = PyString_AS_STRING(self), *s_new;
2402         Py_ssize_t i, n = PyString_GET_SIZE(self);
2403         PyObject *newobj;
2404
2405         newobj = PyString_FromStringAndSize(NULL, n);
2406         if (newobj == NULL)
2407                 return NULL;
2408         s_new = PyString_AsString(newobj);
2409         for (i = 0; i < n; i++) {
2410                 int c = Py_CHARMASK(*s++);
2411                 if (islower(c)) {
2412                         *s_new = toupper(c);
2413                 }
2414                 else if (isupper(c)) {
2415                         *s_new = tolower(c);
2416                 }
2417                 else
2418                         *s_new = c;
2419                 s_new++;
2420         }
2421         return newobj;
2422 }
2423
2424
2425 PyDoc_STRVAR(translate__doc__,
2426 "S.translate(table [,deletechars]) -> string\n\
2427 \n\
2428 Return a copy of the string S, where all characters occurring\n\
2429 in the optional argument deletechars are removed, and the\n\
2430 remaining characters have been mapped through the given\n\
2431 translation table, which must be a string of length 256.");
2432
2433 static PyObject *
2434 string_translate(PyStringObject *self, PyObject *args)
2435 {
2436         register char *input, *output;
2437         const char *table;
2438         register Py_ssize_t i, c, changed = 0;
2439         PyObject *input_obj = (PyObject*)self;
2440         const char *output_start, *del_table=NULL;
2441         Py_ssize_t inlen, tablen, dellen = 0;
2442         PyObject *result;
2443         int trans_table[256];
2444         PyObject *tableobj, *delobj = NULL;
2445
2446         if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2447                               &tableobj, &delobj))
2448                 return NULL;
2449
2450         if (PyString_Check(tableobj)) {
2451                 table = PyString_AS_STRING(tableobj);
2452                 tablen = PyString_GET_SIZE(tableobj);
2453         }
2454         else if (tableobj == Py_None) {
2455                 table = NULL;
2456                 tablen = 256;
2457         }
2458 #ifdef Py_USING_UNICODE
2459         else if (PyUnicode_Check(tableobj)) {
2460                 /* Unicode .translate() does not support the deletechars
2461                    parameter; instead a mapping to None will cause characters
2462                    to be deleted. */
2463                 if (delobj != NULL) {
2464                         PyErr_SetString(PyExc_TypeError,
2465                         "deletions are implemented differently for unicode");
2466                         return NULL;
2467                 }
2468                 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2469         }
2470 #endif
2471         else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2472                 return NULL;
2473
2474         if (tablen != 256) {
2475                 PyErr_SetString(PyExc_ValueError,
2476                   "translation table must be 256 characters long");
2477                 return NULL;
2478         }
2479
2480         if (delobj != NULL) {
2481                 if (PyString_Check(delobj)) {
2482                         del_table = PyString_AS_STRING(delobj);
2483                         dellen = PyString_GET_SIZE(delobj);
2484                 }
2485 #ifdef Py_USING_UNICODE
2486                 else if (PyUnicode_Check(delobj)) {
2487                         PyErr_SetString(PyExc_TypeError,
2488                         "deletions are implemented differently for unicode");
2489                         return NULL;
2490                 }
2491 #endif
2492                 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2493                         return NULL;
2494         }
2495         else {
2496                 del_table = NULL;
2497                 dellen = 0;
2498         }
2499
2500         inlen = PyString_GET_SIZE(input_obj);
2501         result = PyString_FromStringAndSize((char *)NULL, inlen);
2502         if (result == NULL)
2503                 return NULL;
2504         output_start = output = PyString_AsString(result);
2505         input = PyString_AS_STRING(input_obj);
2506
2507         if (dellen == 0 && table != NULL) {
2508                 /* If no deletions are required, use faster code */
2509                 for (i = inlen; --i >= 0; ) {
2510                         c = Py_CHARMASK(*input++);
2511                         if (Py_CHARMASK((*output++ = table[c])) != c)
2512                                 changed = 1;
2513                 }
2514                 if (changed || !PyString_CheckExact(input_obj))
2515                         return result;
2516                 Py_DECREF(result);
2517                 Py_INCREF(input_obj);
2518                 return input_obj;
2519         }
2520
2521         if (table == NULL) {
2522                 for (i = 0; i < 256; i++)
2523                         trans_table[i] = Py_CHARMASK(i);
2524         } else {
2525                 for (i = 0; i < 256; i++)
2526                         trans_table[i] = Py_CHARMASK(table[i]);
2527         }
2528
2529         for (i = 0; i < dellen; i++)
2530                 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2531
2532         for (i = inlen; --i >= 0; ) {
2533                 c = Py_CHARMASK(*input++);
2534                 if (trans_table[c] != -1)
2535                         if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2536                                 continue;
2537                 changed = 1;
2538         }
2539         if (!changed && PyString_CheckExact(input_obj)) {
2540                 Py_DECREF(result);
2541                 Py_INCREF(input_obj);
2542                 return input_obj;
2543         }
2544         /* Fix the size of the resulting string */
2545         if (inlen > 0)
2546                 _PyString_Resize(&result, output - output_start);
2547         return result;
2548 }
2549
2550
2551 #define FORWARD 1
2552 #define REVERSE -1
2553
2554 /* find and count characters and substrings */
2555
2556 #define findchar(target, target_len, c)                         \
2557   ((char *)memchr((const void *)(target), c, target_len))
2558
2559 /* String ops must return a string.  */
2560 /* If the object is subclass of string, create a copy */
2561 Py_LOCAL(PyStringObject *)
2562 return_self(PyStringObject *self)
2563 {
2564         if (PyString_CheckExact(self)) {
2565                 Py_INCREF(self);
2566                 return self;
2567         }
2568         return (PyStringObject *)PyString_FromStringAndSize(
2569                 PyString_AS_STRING(self),
2570                 PyString_GET_SIZE(self));
2571 }
2572
2573 Py_LOCAL_INLINE(Py_ssize_t)
2574 countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
2575 {
2576         Py_ssize_t count=0;
2577         const char *start=target;
2578         const char *end=target+target_len;
2579
2580         while ( (start=findchar(start, end-start, c)) != NULL ) {
2581                 count++;
2582                 if (count >= maxcount)
2583                         break;
2584                 start += 1;
2585         }
2586         return count;
2587 }
2588
2589 Py_LOCAL(Py_ssize_t)
2590 findstring(const char *target, Py_ssize_t target_len,
2591            const char *pattern, Py_ssize_t pattern_len,
2592            Py_ssize_t start,
2593            Py_ssize_t end,
2594            int direction)
2595 {
2596         if (start < 0) {
2597                 start += target_len;
2598                 if (start < 0)
2599                         start = 0;
2600         }
2601         if (end > target_len) {
2602                 end = target_len;
2603         } else if (end < 0) {
2604                 end += target_len;
2605                 if (end < 0)
2606                         end = 0;
2607         }
2608
2609         /* zero-length substrings always match at the first attempt */
2610         if (pattern_len == 0)
2611                 return (direction > 0) ? start : end;
2612
2613         end -= pattern_len;
2614
2615         if (direction < 0) {
2616                 for (; end >= start; end--)
2617                         if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2618                                 return end;
2619         } else {
2620                 for (; start <= end; start++)
2621                         if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2622                                 return start;
2623         }
2624         return -1;
2625 }
2626
2627 Py_LOCAL_INLINE(Py_ssize_t)
2628 countstring(const char *target, Py_ssize_t target_len,
2629             const char *pattern, Py_ssize_t pattern_len,
2630             Py_ssize_t start,
2631             Py_ssize_t end,
2632             int direction, Py_ssize_t maxcount)
2633 {
2634         Py_ssize_t count=0;
2635
2636         if (start < 0) {
2637                 start += target_len;
2638                 if (start < 0)
2639                         start = 0;
2640         }
2641         if (end > target_len) {
2642                 end = target_len;
2643         } else if (end < 0) {
2644                 end += target_len;
2645                 if (end < 0)
2646                         end = 0;
2647         }
2648
2649         /* zero-length substrings match everywhere */
2650         if (pattern_len == 0 || maxcount == 0) {
2651                 if (target_len+1 < maxcount)
2652                         return target_len+1;
2653                 return maxcount;
2654         }
2655
2656         end -= pattern_len;
2657         if (direction < 0) {
2658                 for (; (end >= start); end--)
2659                         if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2660                                 count++;
2661                                 if (--maxcount <= 0) break;
2662                                 end -= pattern_len-1;
2663                         }
2664         } else {
2665                 for (; (start <= end); start++)
2666                         if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2667                                 count++;
2668                                 if (--maxcount <= 0)
2669                                         break;
2670                                 start += pattern_len-1;
2671                         }
2672         }
2673         return count;
2674 }
2675
2676
2677 /* Algorithms for different cases of string replacement */
2678
2679 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2680 Py_LOCAL(PyStringObject *)
2681 replace_interleave(PyStringObject *self,
2682                    const char *to_s, Py_ssize_t to_len,
2683                    Py_ssize_t maxcount)
2684 {
2685         char *self_s, *result_s;
2686         Py_ssize_t self_len, result_len;
2687         Py_ssize_t count, i, product;
2688         PyStringObject *result;
2689
2690         self_len = PyString_GET_SIZE(self);
2691
2692         /* 1 at the end plus 1 after every character */
2693         count = self_len+1;
2694         if (maxcount < count)
2695                 count = maxcount;
2696
2697         /* Check for overflow */
2698         /*   result_len = count * to_len + self_len; */
2699         product = count * to_len;
2700         if (product / to_len != count) {
2701                 PyErr_SetString(PyExc_OverflowError,
2702                                 "replace string is too long");
2703                 return NULL;
2704         }
2705         result_len = product + self_len;
2706         if (result_len < 0) {
2707                 PyErr_SetString(PyExc_OverflowError,
2708                                 "replace string is too long");
2709                 return NULL;
2710         }
2711
2712         if (! (result = (PyStringObject *)
2713                          PyString_FromStringAndSize(NULL, result_len)) )
2714                 return NULL;
2715
2716         self_s = PyString_AS_STRING(self);
2717         result_s = PyString_AS_STRING(result);
2718
2719         /* TODO: special case single character, which doesn't need memcpy */
2720
2721         /* Lay the first one down (guaranteed this will occur) */
2722         Py_MEMCPY(result_s, to_s, to_len);
2723         result_s += to_len;
2724         count -= 1;
2725
2726         for (i=0; i<count; i++) {
2727                 *result_s++ = *self_s++;
2728                 Py_MEMCPY(result_s, to_s, to_len);
2729                 result_s += to_len;
2730         }
2731
2732         /* Copy the rest of the original string */
2733         Py_MEMCPY(result_s, self_s, self_len-i);
2734
2735         return result;
2736 }
2737
2738 /* Special case for deleting a single character */
2739 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2740 Py_LOCAL(PyStringObject *)
2741 replace_delete_single_character(PyStringObject *self,
2742                                 char from_c, Py_ssize_t maxcount)
2743 {
2744         char *self_s, *result_s;
2745         char *start, *next, *end;
2746         Py_ssize_t self_len, result_len;
2747         Py_ssize_t count;
2748         PyStringObject *result;
2749
2750         self_len = PyString_GET_SIZE(self);
2751         self_s = PyString_AS_STRING(self);
2752
2753         count = countchar(self_s, self_len, from_c, maxcount);
2754         if (count == 0) {
2755                 return return_self(self);
2756         }
2757
2758         result_len = self_len - count;  /* from_len == 1 */
2759         assert(result_len>=0);
2760
2761         if ( (result = (PyStringObject *)
2762                         PyString_FromStringAndSize(NULL, result_len)) == NULL)
2763                 return NULL;
2764         result_s = PyString_AS_STRING(result);
2765
2766         start = self_s;
2767         end = self_s + self_len;
2768         while (count-- > 0) {
2769                 next = findchar(start, end-start, from_c);
2770                 if (next == NULL)
2771                         break;
2772                 Py_MEMCPY(result_s, start, next-start);
2773                 result_s += (next-start);
2774                 start = next+1;
2775         }
2776         Py_MEMCPY(result_s, start, end-start);
2777
2778         return result;
2779 }
2780
2781 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2782
2783 Py_LOCAL(PyStringObject *)
2784 replace_delete_substring(PyStringObject *self,
2785                          const char *from_s, Py_ssize_t from_len,
2786                          Py_ssize_t maxcount) {
2787         char *self_s, *result_s;
2788         char *start, *next, *end;
2789         Py_ssize_t self_len, result_len;
2790         Py_ssize_t count, offset;
2791         PyStringObject *result;
2792
2793         self_len = PyString_GET_SIZE(self);
2794         self_s = PyString_AS_STRING(self);
2795
2796         count = countstring(self_s, self_len,
2797                             from_s, from_len,
2798                             0, self_len, 1,
2799                             maxcount);
2800
2801         if (count == 0) {
2802                 /* no matches */
2803                 return return_self(self);
2804         }
2805
2806         result_len = self_len - (count * from_len);
2807         assert (result_len>=0);
2808
2809         if ( (result = (PyStringObject *)
2810               PyString_FromStringAndSize(NULL, result_len)) == NULL )
2811                 return NULL;
2812
2813         result_s = PyString_AS_STRING(result);
2814
2815         start = self_s;
2816         end = self_s + self_len;
2817         while (count-- > 0) {
2818                 offset = findstring(start, end-start,
2819                                     from_s, from_len,
2820                                     0, end-start, FORWARD);
2821                 if (offset == -1)
2822                         break;
2823                 next = start + offset;
2824
2825                 Py_MEMCPY(result_s, start, next-start);
2826
2827                 result_s += (next-start);
2828                 start = next+from_len;
2829         }
2830         Py_MEMCPY(result_s, start, end-start);
2831         return result;
2832 }
2833
2834 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2835 Py_LOCAL(PyStringObject *)
2836 replace_single_character_in_place(PyStringObject *self,
2837                                   char from_c, char to_c,
2838                                   Py_ssize_t maxcount)
2839 {
2840         char *self_s, *result_s, *start, *end, *next;
2841         Py_ssize_t self_len;
2842         PyStringObject *result;
2843
2844         /* The result string will be the same size */
2845         self_s = PyString_AS_STRING(self);
2846         self_len = PyString_GET_SIZE(self);
2847
2848         next = findchar(self_s, self_len, from_c);
2849
2850         if (next == NULL) {
2851                 /* No matches; return the original string */
2852                 return return_self(self);
2853         }
2854
2855         /* Need to make a new string */
2856         result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2857         if (result == NULL)
2858                 return NULL;
2859         result_s = PyString_AS_STRING(result);
2860         Py_MEMCPY(result_s, self_s, self_len);
2861
2862         /* change everything in-place, starting with this one */
2863         start =  result_s + (next-self_s);
2864         *start = to_c;
2865         start++;
2866         end = result_s + self_len;
2867
2868         while (--maxcount > 0) {
2869                 next = findchar(start, end-start, from_c);
2870                 if (next == NULL)
2871                         break;
2872                 *next = to_c;
2873                 start = next+1;
2874         }
2875
2876         return result;
2877 }
2878
2879 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2880 Py_LOCAL(PyStringObject *)
2881 replace_substring_in_place(PyStringObject *self,
2882                            const char *from_s, Py_ssize_t from_len,
2883                            const char *to_s, Py_ssize_t to_len,
2884                            Py_ssize_t maxcount)
2885 {
2886         char *result_s, *start, *end;
2887         char *self_s;
2888         Py_ssize_t self_len, offset;
2889         PyStringObject *result;
2890
2891         /* The result string will be the same size */
2892
2893         self_s = PyString_AS_STRING(self);
2894         self_len = PyString_GET_SIZE(self);
2895
2896         offset = findstring(self_s, self_len,
2897                             from_s, from_len,
2898                             0, self_len, FORWARD);
2899         if (offset == -1) {
2900                 /* No matches; return the original string */
2901                 return return_self(self);
2902         }
2903
2904         /* Need to make a new string */
2905         result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2906         if (result == NULL)
2907                 return NULL;
2908         result_s = PyString_AS_STRING(result);
2909         Py_MEMCPY(result_s, self_s, self_len);
2910
2911         /* change everything in-place, starting with this one */
2912         start =  result_s + offset;
2913         Py_MEMCPY(start, to_s, from_len);
2914         start += from_len;
2915         end = result_s + self_len;
2916
2917         while ( --maxcount > 0) {
2918                 offset = findstring(start, end-start,
2919                                     from_s, from_len,
2920                                     0, end-start, FORWARD);
2921                 if (offset==-1)
2922                         break;
2923                 Py_MEMCPY(start+offset, to_s, from_len);
2924                 start += offset+from_len;
2925         }
2926
2927         return result;
2928 }
2929
2930 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2931 Py_LOCAL(PyStringObject *)
2932 replace_single_character(PyStringObject *self,
2933                          char from_c,
2934                          const char *to_s, Py_ssize_t to_len,
2935                          Py_ssize_t maxcount)
2936 {
2937         char *self_s, *result_s;
2938         char *start, *next, *end;
2939         Py_ssize_t self_len, result_len;
2940         Py_ssize_t count, product;
2941         PyStringObject *result;
2942
2943         self_s = PyString_AS_STRING(self);
2944         self_len = PyString_GET_SIZE(self);
2945
2946         count = countchar(self_s, self_len, from_c, maxcount);
2947         if (count == 0) {
2948                 /* no matches, return unchanged */
2949                 return return_self(self);
2950         }
2951
2952         /* use the difference between current and new, hence the "-1" */
2953         /*   result_len = self_len + count * (to_len-1)  */
2954         product = count * (to_len-1);
2955         if (product / (to_len-1) != count) {
2956                 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2957                 return NULL;
2958         }
2959         result_len = self_len + product;
2960         if (result_len < 0) {
2961                 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2962                 return NULL;
2963         }
2964
2965         if ( (result = (PyStringObject *)
2966               PyString_FromStringAndSize(NULL, result_len)) == NULL)
2967                 return NULL;
2968         result_s = PyString_AS_STRING(result);
2969
2970         start = self_s;
2971         end = self_s + self_len;
2972         while (count-- > 0) {
2973                 next = findchar(start, end-start, from_c);
2974                 if (next == NULL)
2975                         break;
2976
2977                 if (next == start) {
2978                         /* replace with the 'to' */
2979                         Py_MEMCPY(result_s, to_s, to_len);
2980                         result_s += to_len;
2981                         start += 1;
2982                 } else {
2983                         /* copy the unchanged old then the 'to' */
2984                         Py_MEMCPY(result_s, start, next-start);
2985                         result_s += (next-start);
2986                         Py_MEMCPY(result_s, to_s, to_len);
2987                         result_s += to_len;
2988                         start = next+1;
2989                 }
2990         }
2991         /* Copy the remainder of the remaining string */
2992         Py_MEMCPY(result_s, start, end-start);
2993
2994         return result;
2995 }
2996
2997 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2998 Py_LOCAL(PyStringObject *)
2999 replace_substring(PyStringObject *self,
3000                   const char *from_s, Py_ssize_t from_len,
3001                   const char *to_s, Py_ssize_t to_len,
3002                   Py_ssize_t maxcount) {
3003         char *self_s, *result_s;
3004         char *start, *next, *end;
3005         Py_ssize_t self_len, result_len;
3006         Py_ssize_t count, offset, product;
3007         PyStringObject *result;
3008
3009         self_s = PyString_AS_STRING(self);
3010         self_len = PyString_GET_SIZE(self);
3011
3012         count = countstring(self_s, self_len,
3013                             from_s, from_len,
3014                             0, self_len, FORWARD, maxcount);
3015         if (count == 0) {
3016                 /* no matches, return unchanged */
3017                 return return_self(self);
3018         }
3019
3020         /* Check for overflow */
3021         /*    result_len = self_len + count * (to_len-from_len) */
3022         product = count * (to_len-from_len);
3023         if (product / (to_len-from_len) != count) {
3024                 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3025                 return NULL;
3026         }
3027         result_len = self_len + product;
3028         if (result_len < 0) {
3029                 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3030                 return NULL;
3031         }
3032
3033         if ( (result = (PyStringObject *)
3034               PyString_FromStringAndSize(NULL, result_len)) == NULL)
3035                 return NULL;
3036         result_s = PyString_AS_STRING(result);
3037
3038         start = self_s;
3039         end = self_s + self_len;
3040         while (count-- > 0) {
3041                 offset = findstring(start, end-start,
3042                                     from_s, from_len,
3043                                     0, end-start, FORWARD);
3044                 if (offset == -1)
3045                         break;
3046                 next = start+offset;
3047                 if (next == start) {
3048                         /* replace with the 'to' */
3049                         Py_MEMCPY(result_s, to_s, to_len);
3050                         result_s += to_len;
3051                         start += from_len;
3052                 } else {
3053                         /* copy the unchanged old then the 'to' */
3054                         Py_MEMCPY(result_s, start, next-start);
3055                         result_s += (next-start);
3056                         Py_MEMCPY(result_s, to_s, to_len);
3057                         result_s += to_len;
3058                         start = next+from_len;
3059                 }
3060         }
3061         /* Copy the remainder of the remaining string */
3062         Py_MEMCPY(result_s, start, end-start);
3063
3064         return result;
3065 }
3066
3067
3068 Py_LOCAL(PyStringObject *)
3069 replace(PyStringObject *self,
3070         const char *from_s, Py_ssize_t from_len,
3071         const char *to_s, Py_ssize_t to_len,
3072         Py_ssize_t maxcount)
3073 {
3074         if (maxcount < 0) {
3075                 maxcount = PY_SSIZE_T_MAX;
3076         } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
3077                 /* nothing to do; return the original string */
3078                 return return_self(self);
3079         }
3080
3081         if (maxcount == 0 ||
3082             (from_len == 0 && to_len == 0)) {
3083                 /* nothing to do; return the original string */
3084                 return return_self(self);
3085         }
3086
3087         /* Handle zero-length special cases */
3088
3089         if (from_len == 0) {
3090                 /* insert the 'to' string everywhere.   */
3091                 /*    >>> "Python".replace("", ".")     */
3092                 /*    '.P.y.t.h.o.n.'                   */
3093                 return replace_interleave(self, to_s, to_len, maxcount);
3094         }
3095
3096         /* Except for "".replace("", "A") == "A" there is no way beyond this */
3097         /* point for an empty self string to generate a non-empty string */
3098         /* Special case so the remaining code always gets a non-empty string */
3099         if (PyString_GET_SIZE(self) == 0) {
3100                 return return_self(self);
3101         }
3102
3103         if (to_len == 0) {
3104                 /* delete all occurances of 'from' string */
3105                 if (from_len == 1) {
3106                         return replace_delete_single_character(
3107                                 self, from_s[0], maxcount);
3108                 } else {
3109                         return replace_delete_substring(self, from_s, from_len, maxcount);
3110                 }
3111         }
3112
3113         /* Handle special case where both strings have the same length */
3114
3115         if (from_len == to_len) {
3116                 if (from_len == 1) {
3117                         return replace_single_character_in_place(
3118                                 self,
3119                                 from_s[0],
3120                                 to_s[0],
3121                                 maxcount);
3122                 } else {
3123                         return replace_substring_in_place(
3124                                 self, from_s, from_len, to_s, to_len, maxcount);
3125                 }
3126         }
3127
3128         /* Otherwise use the more generic algorithms */
3129         if (from_len == 1) {
3130                 return replace_single_character(self, from_s[0],
3131                                                 to_s, to_len, maxcount);
3132         } else {
3133                 /* len('from')>=2, len('to')>=1 */
3134                 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
3135         }
3136 }
3137
3138 PyDoc_STRVAR(replace__doc__,
3139 "S.replace (old, new[, count]) -> string\n\
3140 \n\
3141 Return a copy of string S with all occurrences of substring\n\
3142 old replaced by new.  If the optional argument count is\n\
3143 given, only the first count occurrences are replaced.");
3144
3145 static PyObject *
3146 string_replace(PyStringObject *self, PyObject *args)
3147 {
3148         Py_ssize_t count = -1;
3149         PyObject *from, *to;
3150         const char *from_s, *to_s;
3151         Py_ssize_t from_len, to_len;
3152
3153         if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
3154                 return NULL;
3155
3156         if (PyString_Check(from)) {
3157                 from_s = PyString_AS_STRING(from);
3158                 from_len = PyString_GET_SIZE(from);
3159         }
3160 #ifdef Py_USING_UNICODE
3161         if (PyUnicode_Check(from))
3162                 return PyUnicode_Replace((PyObject *)self,
3163                                          from, to, count);
3164 #endif
3165         else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
3166                 return NULL;
3167
3168         if (PyString_Check(to)) {
3169                 to_s = PyString_AS_STRING(to);
3170                 to_len = PyString_GET_SIZE(to);
3171         }
3172 #ifdef Py_USING_UNICODE
3173         else if (PyUnicode_Check(to))
3174                 return PyUnicode_Replace((PyObject *)self,
3175                                          from, to, count);
3176 #endif
3177         else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
3178                 return NULL;
3179
3180         return (PyObject *)replace((PyStringObject *) self,
3181                                    from_s, from_len,
3182                                    to_s, to_len, count);
3183 }
3184
3185 /** End DALKE **/
3186
3187 /* Matches the end (direction >= 0) or start (direction < 0) of self
3188  * against substr, using the start and end arguments. Returns
3189  * -1 on error, 0 if not found and 1 if found.
3190  */
3191 Py_LOCAL(int)
3192 _string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
3193                   Py_ssize_t end, int direction)
3194 {
3195         Py_ssize_t len = PyString_GET_SIZE(self);
3196         Py_ssize_t slen;
3197         const char* sub;
3198         const char* str;
3199
3200         if (PyString_Check(substr)) {
3201                 sub = PyString_AS_STRING(substr);
3202                 slen = PyString_GET_SIZE(substr);
3203         }
3204 #ifdef Py_USING_UNICODE
3205         else if (PyUnicode_Check(substr))
3206                 return PyUnicode_Tailmatch((PyObject *)self,
3207                                            substr, start, end, direction);
3208 #endif
3209         else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3210                 return -1;
3211         str = PyString_AS_STRING(self);
3212
3213         string_adjust_indices(&start, &end, len);
3214
3215         if (direction < 0) {
3216                 /* startswith */
3217                 if (start+slen > len)
3218                         return 0;
3219         } else {
3220                 /* endswith */
3221                 if (end-start < slen || start > len)
3222                         return 0;
3223
3224                 if (end-slen > start)
3225                         start = end - slen;
3226         }
3227         if (end-start >= slen)
3228                 return ! memcmp(str+start, sub, slen);
3229         return 0;
3230 }
3231
3232
3233 PyDoc_STRVAR(startswith__doc__,
3234 "S.startswith(prefix[, start[, end]]) -> bool\n\
3235 \n\
3236 Return True if S starts with the specified prefix, False otherwise.\n\
3237 With optional start, test S beginning at that position.\n\
3238 With optional end, stop comparing S at that position.\n\
3239 prefix can also be a tuple of strings to try.");
3240
3241 static PyObject *
3242 string_startswith(PyStringObject *self, PyObject *args)
3243 {
3244         Py_ssize_t start = 0;
3245         Py_ssize_t end = PY_SSIZE_T_MAX;
3246         PyObject *subobj;
3247         int result;
3248
3249         if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3250                 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3251                 return NULL;
3252         if (PyTuple_Check(subobj)) {
3253                 Py_ssize_t i;
3254                 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3255                         result = _string_tailmatch(self,
3256                                         PyTuple_GET_ITEM(subobj, i),
3257                                         start, end, -1);
3258                         if (result == -1)
3259                                 return NULL;
3260                         else if (result) {
3261                                 Py_RETURN_TRUE;
3262                         }
3263                 }
3264                 Py_RETURN_FALSE;
3265         }
3266         result = _string_tailmatch(self, subobj, start, end, -1);
3267         if (result == -1)
3268                 return NULL;
3269         else
3270                 return PyBool_FromLong(result);
3271 }
3272
3273
3274 PyDoc_STRVAR(endswith__doc__,
3275 "S.endswith(suffix[, start[, end]]) -> bool\n\
3276 \n\
3277 Return True if S ends with the specified suffix, False otherwise.\n\
3278 With optional start, test S beginning at that position.\n\
3279 With optional end, stop comparing S at that position.\n\
3280 suffix can also be a tuple of strings to try.");
3281
3282 static PyObject *
3283 string_endswith(PyStringObject *self, PyObject *args)
3284 {
3285         Py_ssize_t start = 0;
3286         Py_ssize_t end = PY_SSIZE_T_MAX;
3287         PyObject *subobj;
3288         int result;
3289
3290         if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3291                 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3292                 return NULL;
3293         if (PyTuple_Check(subobj)) {
3294                 Py_ssize_t i;
3295                 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3296                         result = _string_tailmatch(self,
3297                                         PyTuple_GET_ITEM(subobj, i),
3298                                         start, end, +1);
3299                         if (result == -1)
3300                                 return NULL;
3301                         else if (result) {
3302                                 Py_RETURN_TRUE;
3303                         }
3304                 }
3305                 Py_RETURN_FALSE;
3306         }
3307         result = _string_tailmatch(self, subobj, start, end, +1);
3308         if (result == -1)
3309                 return NULL;
3310         else
3311                 return PyBool_FromLong(result);
3312 }
3313
3314
3315 PyDoc_STRVAR(encode__doc__,
3316 "S.encode([encoding[,errors]]) -> object\n\
3317 \n\
3318 Encodes S using the codec registered for encoding. encoding defaults\n\
3319 to the default encoding. errors may be given to set a different error\n\
3320 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3321 a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3322 'xmlcharrefreplace' as well as any other name registered with\n\
3323 codecs.register_error that is able to handle UnicodeEncodeErrors.");
3324
3325 static PyObject *
3326 string_encode(PyStringObject *self, PyObject *args)
3327 {
3328     char *encoding = NULL;
3329     char *errors = NULL;
3330     PyObject *v;
3331
3332     if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3333         return NULL;
3334     v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
3335     if (v == NULL)
3336         goto onError;
3337     if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3338         PyErr_Format(PyExc_TypeError,
3339                      "encoder did not return a string/unicode object "
3340                      "(type=%.400s)",
3341                      Py_TYPE(v)->tp_name);
3342         Py_DECREF(v);
3343         return NULL;
3344     }
3345     return v;
3346
3347  onError:
3348     return NULL;
3349 }
3350
3351
3352 PyDoc_STRVAR(decode__doc__,
3353 "S.decode([encoding[,errors]]) -> object\n\
3354 \n\
3355 Decodes S using the codec registered for encoding. encoding defaults\n\
3356 to the default encoding. errors may be given to set a different error\n\
3357 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3358 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3359 as well as any other name registerd with codecs.register_error that is\n\
3360 able to handle UnicodeDecodeErrors.");
3361
3362 static PyObject *
3363 string_decode(PyStringObject *self, PyObject *args)
3364 {
3365     char *encoding = NULL;
3366     char *errors = NULL;
3367     PyObject *v;
3368
3369     if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3370         return NULL;
3371     v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
3372     if (v == NULL)
3373         goto onError;
3374     if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3375         PyErr_Format(PyExc_TypeError,
3376                      "decoder did not return a string/unicode object "
3377                      "(type=%.400s)",
3378                      Py_TYPE(v)->tp_name);
3379         Py_DECREF(v);
3380         return NULL;
3381     }
3382     return v;
3383
3384  onError:
3385     return NULL;
3386 }
3387
3388
3389 PyDoc_STRVAR(expandtabs__doc__,
3390 "S.expandtabs([tabsize]) -> string\n\
3391 \n\
3392 Return a copy of S where all tab characters are expanded using spaces.\n\
3393 If tabsize is not given, a tab size of 8 characters is assumed.");
3394
3395 static PyObject*
3396 string_expandtabs(PyStringObject *self, PyObject *args)
3397 {
3398     const char *e, *p, *qe;
3399     char *q;
3400     Py_ssize_t i, j, incr;
3401     PyObject *u;
3402     int tabsize = 8;
3403
3404     if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3405         return NULL;
3406
3407     /* First pass: determine size of output string */
3408     i = 0; /* chars up to and including most recent \n or \r */
3409     j = 0; /* chars since most recent \n or \r (use in tab calculations) */
3410     e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
3411     for (p = PyString_AS_STRING(self); p < e; p++)
3412         if (*p == '\t') {
3413             if (tabsize > 0) {
3414                 incr = tabsize - (j % tabsize);
3415                 if (j > PY_SSIZE_T_MAX - incr)
3416                     goto overflow1;
3417                 j += incr;
3418             }
3419         }
3420         else {
3421             if (j > PY_SSIZE_T_MAX - 1)
3422                 goto overflow1;
3423             j++;
3424             if (*p == '\n' || *p == '\r') {
3425                 if (i > PY_SSIZE_T_MAX - j)
3426                     goto overflow1;
3427                 i += j;
3428                 j = 0;
3429             }
3430         }
3431
3432     if (i > PY_SSIZE_T_MAX - j)
3433         goto overflow1;
3434
3435     /* Second pass: create output string and fill it */
3436     u = PyString_FromStringAndSize(NULL, i + j);
3437     if (!u)
3438         return NULL;
3439
3440     j = 0; /* same as in first pass */
3441     q = PyString_AS_STRING(u); /* next output char */
3442     qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
3443
3444     for (p = PyString_AS_STRING(self); p < e; p++)
3445         if (*p == '\t') {
3446             if (tabsize > 0) {
3447                 i = tabsize - (j % tabsize);
3448                 j += i;
3449                 while (i--) {
3450                     if (q >= qe)
3451                         goto overflow2;
3452                     *q++ = ' ';
3453                 }
3454             }
3455         }
3456         else {
3457             if (q >= qe)
3458                 goto overflow2;
3459             *q++ = *p;
3460             j++;
3461             if (*p == '\n' || *p == '\r')
3462                 j = 0;
3463         }
3464
3465     return u;
3466
3467   overflow2:
3468     Py_DECREF(u);
3469   overflow1:
3470     PyErr_SetString(PyExc_OverflowError, "new string is too long");
3471     return NULL;
3472 }
3473
3474 Py_LOCAL_INLINE(PyObject *)
3475 pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
3476 {
3477     PyObject *u;
3478
3479     if (left < 0)
3480         left = 0;
3481     if (right < 0)
3482         right = 0;
3483
3484     if (left == 0 && right == 0 && PyString_CheckExact(self)) {
3485         Py_INCREF(self);
3486         return (PyObject *)self;
3487     }
3488
3489     u = PyString_FromStringAndSize(NULL,
3490                                    left + PyString_GET_SIZE(self) + right);
3491     if (u) {
3492         if (left)
3493             memset(PyString_AS_STRING(u), fill, left);
3494         Py_MEMCPY(PyString_AS_STRING(u) + left,
3495                PyString_AS_STRING(self),
3496                PyString_GET_SIZE(self));
3497         if (right)
3498             memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3499                    fill, right);
3500     }
3501
3502     return u;
3503 }
3504
3505 PyDoc_STRVAR(ljust__doc__,
3506 "S.ljust(width[, fillchar]) -> string\n"
3507 "\n"
3508 "Return S left justified in a string of length width. Padding is\n"
3509 "done using the specified fill character (default is a space).");
3510
3511 static PyObject *
3512 string_ljust(PyStringObject *self, PyObject *args)
3513 {
3514     Py_ssize_t width;
3515     char fillchar = ' ';
3516
3517     if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
3518         return NULL;
3519
3520     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3521         Py_INCREF(self);
3522         return (PyObject*) self;
3523     }
3524
3525     return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
3526 }
3527
3528
3529 PyDoc_STRVAR(rjust__doc__,
3530 "S.rjust(width[, fillchar]) -> string\n"
3531 "\n"
3532 "Return S right justified in a string of length width. Padding is\n"
3533 "done using the specified fill character (default is a space)");
3534
3535 static PyObject *
3536 string_rjust(PyStringObject *self, PyObject *args)
3537 {
3538     Py_ssize_t width;
3539     char fillchar = ' ';
3540
3541     if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
3542         return NULL;
3543
3544     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3545         Py_INCREF(self);
3546         return (PyObject*) self;
3547     }
3548
3549     return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
3550 }
3551
3552
3553 PyDoc_STRVAR(center__doc__,
3554 "S.center(width[, fillchar]) -> string\n"
3555 "\n"
3556 "Return S centered in a string of length width. Padding is\n"
3557 "done using the specified fill character (default is a space)");
3558
3559 static PyObject *
3560 string_center(PyStringObject *self, PyObject *args)
3561 {
3562     Py_ssize_t marg, left;
3563     Py_ssize_t width;
3564     char fillchar = ' ';
3565
3566     if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
3567         return NULL;
3568
3569     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3570         Py_INCREF(self);
3571         return (PyObject*) self;
3572     }
3573
3574     marg = width - PyString_GET_SIZE(self);
3575     left = marg / 2 + (marg & width & 1);
3576
3577     return pad(self, left, marg - left, fillchar);
3578 }
3579
3580 PyDoc_STRVAR(zfill__doc__,
3581 "S.zfill(width) -> string\n"
3582 "\n"
3583 "Pad a numeric string S with zeros on the left, to fill a field\n"
3584 "of the specified width.  The string S is never truncated.");
3585
3586 static PyObject *
3587 string_zfill(PyStringObject *self, PyObject *args)
3588 {
3589     Py_ssize_t fill;
3590     PyObject *s;
3591     char *p;
3592     Py_ssize_t width;
3593
3594     if (!PyArg_ParseTuple(args, "n:zfill", &width))
3595         return NULL;
3596
3597     if (PyString_GET_SIZE(self) >= width) {
3598         if (PyString_CheckExact(self)) {
3599             Py_INCREF(self);
3600             return (PyObject*) self;
3601         }
3602         else
3603             return PyString_FromStringAndSize(
3604                 PyString_AS_STRING(self),
3605                 PyString_GET_SIZE(self)
3606             );
3607     }
3608
3609     fill = width - PyString_GET_SIZE(self);
3610
3611     s = pad(self, fill, 0, '0');
3612
3613     if (s == NULL)
3614         return NULL;
3615
3616     p = PyString_AS_STRING(s);
3617     if (p[fill] == '+' || p[fill] == '-') {
3618         /* move sign to beginning of string */
3619         p[0] = p[fill];
3620         p[fill] = '0';
3621     }
3622
3623     return (PyObject*) s;
3624 }
3625
3626 PyDoc_STRVAR(isspace__doc__,
3627 "S.isspace() -> bool\n\
3628 \n\
3629 Return True if all characters in S are whitespace\n\
3630 and there is at least one character in S, False otherwise.");
3631
3632 static PyObject*
3633 string_isspace(PyStringObject *self)
3634 {
3635     register const unsigned char *p
3636         = (unsigned char *) PyString_AS_STRING(self);
3637     register const unsigned char *e;
3638
3639     /* Shortcut for single character strings */
3640     if (PyString_GET_SIZE(self) == 1 &&
3641         isspace(*p))
3642         return PyBool_FromLong(1);
3643
3644     /* Special case for empty strings */
3645     if (PyString_GET_SIZE(self) == 0)
3646         return PyBool_FromLong(0);
3647
3648     e = p + PyString_GET_SIZE(self);
3649     for (; p < e; p++) {
3650         if (!isspace(*p))
3651             return PyBool_FromLong(0);
3652     }
3653     return PyBool_FromLong(1);
3654 }
3655
3656
3657 PyDoc_STRVAR(isalpha__doc__,
3658 "S.isalpha() -> bool\n\
3659 \n\
3660 Return True if all characters in S are alphabetic\n\
3661 and there is at least one character in S, False otherwise.");
3662
3663 static PyObject*
3664 string_isalpha(PyStringObject *self)
3665 {
3666     register const unsigned char *p
3667         = (unsigned char *) PyString_AS_STRING(self);
3668     register const unsigned char *e;
3669
3670     /* Shortcut for single character strings */
3671     if (PyString_GET_SIZE(self) == 1 &&
3672         isalpha(*p))
3673         return PyBool_FromLong(1);
3674
3675     /* Special case for empty strings */
3676     if (PyString_GET_SIZE(self) == 0)
3677         return PyBool_FromLong(0);
3678
3679     e = p + PyString_GET_SIZE(self);
3680     for (; p < e; p++) {
3681         if (!isalpha(*p))
3682             return PyBool_FromLong(0);
3683     }
3684     return PyBool_FromLong(1);
3685 }
3686
3687
3688 PyDoc_STRVAR(isalnum__doc__,
3689 "S.isalnum() -> bool\n\
3690 \n\
3691 Return True if all characters in S are alphanumeric\n\
3692 and there is at least one character in S, False otherwise.");
3693
3694 static PyObject*
3695 string_isalnum(PyStringObject *self)
3696 {
3697     register const unsigned char *p
3698         = (unsigned char *) PyString_AS_STRING(self);
3699     register const unsigned char *e;
3700
3701     /* Shortcut for single character strings */
3702     if (PyString_GET_SIZE(self) == 1 &&
3703         isalnum(*p))
3704         return PyBool_FromLong(1);
3705
3706     /* Special case for empty strings */
3707     if (PyString_GET_SIZE(self) == 0)
3708         return PyBool_FromLong(0);
3709
3710     e = p + PyString_GET_SIZE(self);
3711     for (; p < e; p++) {
3712         if (!isalnum(*p))
3713             return PyBool_FromLong(0);
3714     }
3715     return PyBool_FromLong(1);
3716 }
3717
3718
3719 PyDoc_STRVAR(isdigit__doc__,
3720 "S.isdigit() -> bool\n\
3721 \n\
3722 Return True if all characters in S are digits\n\
3723 and there is at least one character in S, False otherwise.");
3724
3725 static PyObject*
3726 string_isdigit(PyStringObject *self)
3727 {
3728     register const unsigned char *p
3729         = (unsigned char *) PyString_AS_STRING(self);
3730     register const unsigned char *e;
3731
3732     /* Shortcut for single character strings */
3733     if (PyString_GET_SIZE(self) == 1 &&
3734         isdigit(*p))
3735         return PyBool_FromLong(1);
3736
3737     /* Special case for empty strings */
3738     if (PyString_GET_SIZE(self) == 0)
3739         return PyBool_FromLong(0);
3740
3741     e = p + PyString_GET_SIZE(self);
3742     for (; p < e; p++) {
3743         if (!isdigit(*p))
3744             return PyBool_FromLong(0);
3745     }
3746     return PyBool_FromLong(1);
3747 }
3748
3749
3750 PyDoc_STRVAR(islower__doc__,
3751 "S.islower() -> bool\n\
3752 \n\
3753 Return True if all cased characters in S are lowercase and there is\n\
3754 at least one cased character in S, False otherwise.");
3755
3756 static PyObject*
3757 string_islower(PyStringObject *self)
3758 {
3759     register const unsigned char *p
3760         = (unsigned char *) PyString_AS_STRING(self);
3761     register const unsigned char *e;
3762     int cased;
3763
3764     /* Shortcut for single character strings */
3765     if (PyString_GET_SIZE(self) == 1)
3766         return PyBool_FromLong(islower(*p) != 0);
3767
3768     /* Special case for empty strings */
3769     if (PyString_GET_SIZE(self) == 0)
3770         return PyBool_FromLong(0);
3771
3772     e = p + PyString_GET_SIZE(self);
3773     cased = 0;
3774     for (; p < e; p++) {
3775         if (isupper(*p))
3776             return PyBool_FromLong(0);
3777         else if (!cased && islower(*p))
3778             cased = 1;
3779     }
3780     return PyBool_FromLong(cased);
3781 }
3782
3783
3784 PyDoc_STRVAR(isupper__doc__,
3785 "S.isupper() -> bool\n\
3786 \n\
3787 Return True if all cased characters in S are uppercase and there is\n\
3788 at least one cased character in S, False otherwise.");
3789
3790 static PyObject*
3791 string_isupper(PyStringObject *self)
3792 {
3793     register const unsigned char *p
3794         = (unsigned char *) PyString_AS_STRING(self);
3795     register const unsigned char *e;
3796     int cased;
3797
3798     /* Shortcut for single character strings */
3799     if (PyString_GET_SIZE(self) == 1)
3800         return PyBool_FromLong(isupper(*p) != 0);
3801
3802     /* Special case for empty strings */
3803     if (PyString_GET_SIZE(self) == 0)
3804         return PyBool_FromLong(0);
3805
3806     e = p + PyString_GET_SIZE(self);
3807     cased = 0;
3808     for (; p < e; p++) {
3809         if (islower(*p))
3810             return PyBool_FromLong(0);
3811         else if (!cased && isupper(*p))
3812             cased = 1;
3813     }
3814     return PyBool_FromLong(cased);
3815 }
3816
3817
3818 PyDoc_STRVAR(istitle__doc__,
3819 "S.istitle() -> bool\n\
3820 \n\
3821 Return True if S is a titlecased string and there is at least one\n\
3822 character in S, i.e. uppercase characters may only follow uncased\n\
3823 characters and lowercase characters only cased ones. Return False\n\
3824 otherwise.");
3825
3826 static PyObject*
3827 string_istitle(PyStringObject *self, PyObject *uncased)
3828 {
3829     register const unsigned char *p
3830         = (unsigned char *) PyString_AS_STRING(self);
3831     register const unsigned char *e;
3832     int cased, previous_is_cased;
3833
3834     /* Shortcut for single character strings */
3835     if (PyString_GET_SIZE(self) == 1)
3836         return PyBool_FromLong(isupper(*p) != 0);
3837
3838     /* Special case for empty strings */
3839     if (PyString_GET_SIZE(self) == 0)
3840         return PyBool_FromLong(0);
3841
3842     e = p + PyString_GET_SIZE(self);
3843     cased = 0;
3844     previous_is_cased = 0;
3845     for (; p < e; p++) {
3846         register const unsigned char ch = *p;
3847
3848         if (isupper(ch)) {
3849             if (previous_is_cased)
3850                 return PyBool_FromLong(0);
3851             previous_is_cased = 1;
3852             cased = 1;
3853         }
3854         else if (islower(ch)) {
3855             if (!previous_is_cased)
3856                 return PyBool_FromLong(0);
3857             previous_is_cased = 1;
3858             cased = 1;
3859         }
3860         else
3861             previous_is_cased = 0;
3862     }
3863     return PyBool_FromLong(cased);
3864 }
3865
3866
3867 PyDoc_STRVAR(splitlines__doc__,
3868 "S.splitlines([keepends]) -> list of strings\n\
3869 \n\
3870 Return a list of the lines in S, breaking at line boundaries.\n\
3871 Line breaks are not included in the resulting list unless keepends\n\
3872 is given and true.");
3873
3874 static PyObject*
3875 string_splitlines(PyStringObject *self, PyObject *args)
3876 {
3877     register Py_ssize_t i;
3878     register Py_ssize_t j;
3879     Py_ssize_t len;
3880     int keepends = 0;
3881     PyObject *list;
3882     PyObject *str;
3883     char *data;
3884
3885     if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
3886         return NULL;
3887
3888     data = PyString_AS_STRING(self);
3889     len = PyString_GET_SIZE(self);
3890
3891     /* This does not use the preallocated list because splitlines is
3892        usually run with hundreds of newlines.  The overhead of
3893        switching between PyList_SET_ITEM and append causes about a
3894        2-3% slowdown for that common case.  A smarter implementation
3895        could move the if check out, so the SET_ITEMs are done first
3896        and the appends only done when the prealloc buffer is full.
3897        That's too much work for little gain.*/
3898
3899     list = PyList_New(0);
3900     if (!list)
3901         goto onError;
3902
3903     for (i = j = 0; i < len; ) {
3904         Py_ssize_t eol;
3905
3906         /* Find a line and append it */
3907         while (i < len && data[i] != '\n' && data[i] != '\r')
3908             i++;
3909
3910         /* Skip the line break reading CRLF as one line break */
3911         eol = i;
3912         if (i < len) {
3913             if (data[i] == '\r' && i + 1 < len &&
3914                 data[i+1] == '\n')
3915                 i += 2;
3916             else
3917                 i++;
3918             if (keepends)
3919                 eol = i;
3920         }
3921         SPLIT_APPEND(data, j, eol);
3922         j = i;
3923     }
3924     if (j < len) {
3925         SPLIT_APPEND(data, j, len);
3926     }
3927
3928     return list;
3929
3930  onError:
3931     Py_XDECREF(list);
3932     return NULL;
3933 }
3934
3935 PyDoc_STRVAR(sizeof__doc__,
3936 "S.__sizeof__() -> size of S in memory, in bytes");
3937
3938 static PyObject *
3939 string_sizeof(PyStringObject *v)
3940 {
3941         Py_ssize_t res;
3942         res = sizeof(PyStringObject) + v->ob_size * v->ob_type->tp_itemsize;
3943         return PyInt_FromSsize_t(res);
3944 }
3945
3946 #undef SPLIT_APPEND
3947 #undef SPLIT_ADD
3948 #undef MAX_PREALLOC
3949 #undef PREALLOC_SIZE
3950
3951 static PyObject *
3952 string_getnewargs(PyStringObject *v)
3953 {
3954         return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
3955 }
3956
3957
3958 #include "stringlib/string_format.h"
3959
3960 PyDoc_STRVAR(format__doc__,
3961 "S.format(*args, **kwargs) -> unicode\n\
3962 \n\
3963 ");
3964
3965 static PyObject *
3966 string__format__(PyObject* self, PyObject* args)
3967 {
3968     PyObject *format_spec;
3969     PyObject *result = NULL;
3970     PyObject *tmp = NULL;
3971
3972     /* If 2.x, convert format_spec to the same type as value */
3973     /* This is to allow things like u''.format('') */
3974     if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
3975         goto done;
3976     if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
3977         PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
3978                      "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
3979         goto done;
3980     }
3981     tmp = PyObject_Str(format_spec);
3982     if (tmp == NULL)
3983         goto done;
3984     format_spec = tmp;
3985
3986     result = _PyBytes_FormatAdvanced(self,
3987                                      PyString_AS_STRING(format_spec),
3988                                      PyString_GET_SIZE(format_spec));
3989 done:
3990     Py_XDECREF(tmp);
3991     return result;
3992 }
3993
3994 PyDoc_STRVAR(p_format__doc__,
3995 "S.__format__(format_spec) -> unicode\n\
3996 \n\
3997 ");
3998
3999 \f
4000 static PyMethodDef
4001 string_methods[] = {
4002         /* Counterparts of the obsolete stropmodule functions; except
4003            string.maketrans(). */
4004         {"join", (PyCFunction)string_join, METH_O, join__doc__},
4005         {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
4006         {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
4007         {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
4008         {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
4009         {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
4010         {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
4011         {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
4012         {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
4013         {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
4014         {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
4015         {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
4016         {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
4017          capitalize__doc__},
4018         {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
4019         {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
4020          endswith__doc__},
4021         {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
4022         {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
4023         {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
4024         {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
4025         {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
4026         {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
4027         {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
4028         {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
4029         {"rpartition", (PyCFunction)string_rpartition, METH_O,
4030          rpartition__doc__},
4031         {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
4032          startswith__doc__},
4033         {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
4034         {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
4035          swapcase__doc__},
4036         {"translate", (PyCFunction)string_translate, METH_VARARGS,
4037          translate__doc__},
4038         {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
4039         {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
4040         {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
4041         {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
4042         {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
4043         {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
4044         {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
4045         {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
4046         {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
4047         {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
4048         {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
4049         {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
4050          expandtabs__doc__},
4051         {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
4052          splitlines__doc__},
4053         {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
4054          sizeof__doc__},
4055         {"__getnewargs__",      (PyCFunction)string_getnewargs, METH_NOARGS},
4056         {NULL,     NULL}                     /* sentinel */
4057 };
4058
4059 static PyObject *
4060 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
4061
4062 static PyObject *
4063 string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4064 {
4065         PyObject *x = NULL;
4066         static char *kwlist[] = {"object", 0};
4067
4068         if (type != &PyString_Type)
4069                 return str_subtype_new(type, args, kwds);
4070         if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
4071                 return NULL;
4072         if (x == NULL)
4073                 return PyString_FromString("");
4074         return PyObject_Str(x);
4075 }
4076
4077 static PyObject *
4078 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4079 {
4080         PyObject *tmp, *pnew;
4081         Py_ssize_t n;
4082
4083         assert(PyType_IsSubtype(type, &PyString_Type));
4084         tmp = string_new(&PyString_Type, args, kwds);
4085         if (tmp == NULL)
4086                 return NULL;
4087         assert(PyString_CheckExact(tmp));
4088         n = PyString_GET_SIZE(tmp);
4089         pnew = type->tp_alloc(type, n);
4090         if (pnew != NULL) {
4091                 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
4092                 ((PyStringObject *)pnew)->ob_shash =
4093                         ((PyStringObject *)tmp)->ob_shash;
4094                 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
4095         }
4096         Py_DECREF(tmp);
4097         return pnew;
4098 }
4099
4100 static PyObject *
4101 basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4102 {
4103         PyErr_SetString(PyExc_TypeError,
4104                         "The basestring type cannot be instantiated");
4105         return NULL;
4106 }
4107
4108 static PyObject *
4109 string_mod(PyObject *v, PyObject *w)
4110 {
4111         if (!PyString_Check(v)) {
4112                 Py_INCREF(Py_NotImplemented);
4113                 return Py_NotImplemented;
4114         }
4115         return PyString_Format(v, w);
4116 }
4117
4118 PyDoc_STRVAR(basestring_doc,
4119 "Type basestring cannot be instantiated; it is the base for str and unicode.");
4120
4121 static PyNumberMethods string_as_number = {
4122         0,                      /*nb_add*/
4123         0,                      /*nb_subtract*/
4124         0,                      /*nb_multiply*/
4125         0,                      /*nb_divide*/
4126         string_mod,             /*nb_remainder*/
4127 };
4128
4129
4130 PyTypeObject PyBaseString_Type = {
4131         PyVarObject_HEAD_INIT(&PyType_Type, 0)
4132         "basestring",
4133         0,
4134         0,
4135         0,                                      /* tp_dealloc */
4136         0,                                      /* tp_print */
4137         0,                                      /* tp_getattr */
4138         0,                                      /* tp_setattr */
4139         0,                                      /* tp_compare */
4140         0,                                      /* tp_repr */
4141         0,                                      /* tp_as_number */
4142         0,                                      /* tp_as_sequence */
4143         0,                                      /* tp_as_mapping */
4144         0,                                      /* tp_hash */
4145         0,                                      /* tp_call */
4146         0,                                      /* tp_str */
4147         0,                                      /* tp_getattro */
4148         0,                                      /* tp_setattro */
4149         0,                                      /* tp_as_buffer */
4150         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
4151         basestring_doc,                         /* tp_doc */
4152         0,                                      /* tp_traverse */
4153         0,                                      /* tp_clear */
4154         0,                                      /* tp_richcompare */
4155         0,                                      /* tp_weaklistoffset */
4156         0,                                      /* tp_iter */
4157         0,                                      /* tp_iternext */
4158         0,                                      /* tp_methods */
4159         0,                                      /* tp_members */
4160         0,                                      /* tp_getset */
4161         &PyBaseObject_Type,                     /* tp_base */
4162         0,                                      /* tp_dict */
4163         0,                                      /* tp_descr_get */
4164         0,                                      /* tp_descr_set */
4165         0,                                      /* tp_dictoffset */
4166         0,                                      /* tp_init */
4167         0,                                      /* tp_alloc */
4168         basestring_new,                         /* tp_new */
4169         0,                                      /* tp_free */
4170 };
4171
4172 PyDoc_STRVAR(string_doc,
4173 "str(object) -> string\n\
4174 \n\
4175 Return a nice string representation of the object.\n\
4176 If the argument is a string, the return value is the same object.");
4177
4178 PyTypeObject PyString_Type = {
4179         PyVarObject_HEAD_INIT(&PyType_Type, 0)
4180         "str",
4181         sizeof(PyStringObject),
4182         sizeof(char),
4183         string_dealloc,                         /* tp_dealloc */
4184         (printfunc)string_print,                /* tp_print */
4185         0,                                      /* tp_getattr */
4186         0,                                      /* tp_setattr */
4187         0,                                      /* tp_compare */
4188         string_repr,                            /* tp_repr */
4189         &string_as_number,                      /* tp_as_number */
4190         &string_as_sequence,                    /* tp_as_sequence */
4191         &string_as_mapping,                     /* tp_as_mapping */
4192         (hashfunc)string_hash,                  /* tp_hash */
4193         0,                                      /* tp_call */
4194         string_str,                             /* tp_str */
4195         PyObject_GenericGetAttr,                /* tp_getattro */
4196         0,                                      /* tp_setattro */
4197         &string_as_buffer,                      /* tp_as_buffer */
4198         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
4199                 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
4200                 Py_TPFLAGS_HAVE_NEWBUFFER,      /* tp_flags */
4201         string_doc,                             /* tp_doc */
4202         0,                                      /* tp_traverse */
4203         0,                                      /* tp_clear */
4204         (richcmpfunc)string_richcompare,        /* tp_richcompare */
4205         0,                                      /* tp_weaklistoffset */
4206         0,                                      /* tp_iter */
4207         0,                                      /* tp_iternext */
4208         string_methods,                         /* tp_methods */
4209         0,                                      /* tp_members */
4210         0,                                      /* tp_getset */
4211         &PyBaseString_Type,                     /* tp_base */
4212         0,                                      /* tp_dict */
4213         0,                                      /* tp_descr_get */
4214         0,                                      /* tp_descr_set */
4215         0,                                      /* tp_dictoffset */
4216         0,                                      /* tp_init */
4217         0,                                      /* tp_alloc */
4218         string_new,                             /* tp_new */
4219         PyObject_Del,                           /* tp_free */
4220 };
4221
4222 void
4223 PyString_Concat(register PyObject **pv, register PyObject *w)
4224 {
4225         register PyObject *v;
4226         if (*pv == NULL)
4227                 return;
4228         if (w == NULL || !PyString_Check(*pv)) {
4229                 Py_DECREF(*pv);
4230                 *pv = NULL;
4231                 return;
4232         }
4233         v = string_concat((PyStringObject *) *pv, w);
4234         Py_DECREF(*pv);
4235         *pv = v;
4236 }
4237
4238 void
4239 PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
4240 {
4241         PyString_Concat(pv, w);
4242         Py_XDECREF(w);
4243 }
4244
4245
4246 /* The following function breaks the notion that strings are immutable:
4247    it changes the size of a string.  We get away with this only if there
4248    is only one module referencing the object.  You can also think of it
4249    as creating a new string object and destroying the old one, only
4250    more efficiently.  In any case, don't use this if the string may
4251    already be known to some other part of the code...
4252    Note that if there's not enough memory to resize the string, the original
4253    string object at *pv is deallocated, *pv is set to NULL, an "out of
4254    memory" exception is set, and -1 is returned.  Else (on success) 0 is
4255    returned, and the value in *pv may or may not be the same as on input.
4256    As always, an extra byte is allocated for a trailing \0 byte (newsize
4257    does *not* include that), and a trailing \0 byte is stored.
4258 */
4259
4260 int
4261 _PyString_Resize(PyObject **pv, Py_ssize_t newsize)
4262 {
4263         register PyObject *v;
4264         register PyStringObject *sv;
4265         v = *pv;
4266         if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
4267             PyString_CHECK_INTERNED(v)) {
4268                 *pv = 0;
4269                 Py_DECREF(v);
4270                 PyErr_BadInternalCall();
4271                 return -1;
4272         }
4273         /* XXX UNREF/NEWREF interface should be more symmetrical */
4274         _Py_DEC_REFTOTAL;
4275         _Py_ForgetReference(v);
4276         *pv = (PyObject *)
4277                 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
4278         if (*pv == NULL) {
4279                 PyObject_Del(v);
4280                 PyErr_NoMemory();
4281                 return -1;
4282         }
4283         _Py_NewReference(*pv);
4284         sv = (PyStringObject *) *pv;
4285         Py_SIZE(sv) = newsize;
4286         sv->ob_sval[newsize] = '\0';
4287         sv->ob_shash = -1;      /* invalidate cached hash value */
4288         return 0;
4289 }
4290
4291 /* Helpers for formatstring */
4292
4293 Py_LOCAL_INLINE(PyObject *)
4294 getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
4295 {
4296         Py_ssize_t argidx = *p_argidx;
4297         if (argidx < arglen) {
4298                 (*p_argidx)++;
4299                 if (arglen < 0)
4300                         return args;
4301                 else
4302                         return PyTuple_GetItem(args, argidx);
4303         }
4304         PyErr_SetString(PyExc_TypeError,
4305                         "not enough arguments for format string");
4306         return NULL;
4307 }
4308
4309 /* Format codes
4310  * F_LJUST      '-'
4311  * F_SIGN       '+'
4312  * F_BLANK      ' '
4313  * F_ALT        '#'
4314  * F_ZERO       '0'
4315  */
4316 #define F_LJUST (1<<0)
4317 #define F_SIGN  (1<<1)
4318 #define F_BLANK (1<<2)
4319 #define F_ALT   (1<<3)
4320 #define F_ZERO  (1<<4)
4321
4322 Py_LOCAL_INLINE(int)
4323 formatfloat(char *buf, size_t buflen, int flags,
4324             int prec, int type, PyObject *v)
4325 {
4326         /* fmt = '%#.' + `prec` + `type`
4327            worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
4328         char fmt[20];
4329         double x;
4330         x = PyFloat_AsDouble(v);
4331         if (x == -1.0 && PyErr_Occurred()) {
4332                 PyErr_Format(PyExc_TypeError, "float argument required, "
4333                              "not %.200s", Py_TYPE(v)->tp_name);
4334                 return -1;
4335         }
4336         if (prec < 0)
4337                 prec = 6;
4338         if (type == 'f' && fabs(x)/1e25 >= 1e25)
4339                 type = 'g';
4340         /* Worst case length calc to ensure no buffer overrun:
4341
4342            'g' formats:
4343              fmt = %#.<prec>g
4344              buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
4345                 for any double rep.)
4346              len = 1 + prec + 1 + 2 + 5 = 9 + prec
4347
4348            'f' formats:
4349              buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4350              len = 1 + 50 + 1 + prec = 52 + prec
4351
4352            If prec=0 the effective precision is 1 (the leading digit is
4353            always given), therefore increase the length by one.
4354
4355         */
4356         if (((type == 'g' || type == 'G') &&
4357               buflen <= (size_t)10 + (size_t)prec) ||
4358             (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
4359                 PyErr_SetString(PyExc_OverflowError,
4360                         "formatted float is too long (precision too large?)");
4361                 return -1;
4362         }
4363         PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4364                       (flags&F_ALT) ? "#" : "",
4365                       prec, type);
4366         PyOS_ascii_formatd(buf, buflen, fmt, x);
4367         return (int)strlen(buf);
4368 }
4369
4370 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4371  * the F_ALT flag, for Python's long (unbounded) ints.  It's not used for
4372  * Python's regular ints.
4373  * Return value:  a new PyString*, or NULL if error.
4374  *  .  *pbuf is set to point into it,
4375  *     *plen set to the # of chars following that.
4376  *     Caller must decref it when done using pbuf.
4377  *     The string starting at *pbuf is of the form
4378  *         "-"? ("0x" | "0X")? digit+
4379  *     "0x"/"0X" are present only for x and X conversions, with F_ALT
4380  *         set in flags.  The case of hex digits will be correct,
4381  *     There will be at least prec digits, zero-filled on the left if
4382  *         necessary to get that many.
4383  * val          object to be converted
4384  * flags        bitmask of format flags; only F_ALT is looked at
4385  * prec         minimum number of digits; 0-fill on left if needed
4386  * type         a character in [duoxX]; u acts the same as d
4387  *
4388  * CAUTION:  o, x and X conversions on regular ints can never
4389  * produce a '-' sign, but can for Python's unbounded ints.
4390  */
4391 PyObject*
4392 _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4393                      char **pbuf, int *plen)
4394 {
4395         PyObject *result = NULL;
4396         char *buf;
4397         Py_ssize_t i;
4398         int sign;       /* 1 if '-', else 0 */
4399         int len;        /* number of characters */
4400         Py_ssize_t llen;
4401         int numdigits;  /* len == numnondigits + numdigits */
4402         int numnondigits = 0;
4403
4404         switch (type) {
4405         case 'd':
4406         case 'u':
4407                 result = Py_TYPE(val)->tp_str(val);
4408                 break;
4409         case 'o':
4410                 result = Py_TYPE(val)->tp_as_number->nb_oct(val);
4411                 break;
4412         case 'x':
4413         case 'X':
4414                 numnondigits = 2;
4415                 result = Py_TYPE(val)->tp_as_number->nb_hex(val);
4416                 break;
4417         default:
4418                 assert(!"'type' not in [duoxX]");
4419         }
4420         if (!result)
4421                 return NULL;
4422
4423         buf = PyString_AsString(result);
4424         if (!buf) {
4425                 Py_DECREF(result);
4426                 return NULL;
4427         }
4428
4429         /* To modify the string in-place, there can only be one reference. */
4430         if (Py_REFCNT(result) != 1) {
4431                 PyErr_BadInternalCall();
4432                 return NULL;
4433         }
4434         llen = PyString_Size(result);
4435         if (llen > INT_MAX) {
4436                 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4437                 return NULL;
4438         }
4439         len = (int)llen;
4440         if (buf[len-1] == 'L') {
4441                 --len;
4442                 buf[len] = '\0';
4443         }
4444         sign = buf[0] == '-';
4445         numnondigits += sign;
4446         numdigits = len - numnondigits;
4447         assert(numdigits > 0);
4448
4449         /* Get rid of base marker unless F_ALT */
4450         if ((flags & F_ALT) == 0) {
4451                 /* Need to skip 0x, 0X or 0. */
4452                 int skipped = 0;
4453                 switch (type) {
4454                 case 'o':
4455                         assert(buf[sign] == '0');
4456                         /* If 0 is only digit, leave it alone. */
4457                         if (numdigits > 1) {
4458                                 skipped = 1;
4459                                 --numdigits;
4460                         }
4461                         break;
4462                 case 'x':
4463                 case 'X':
4464                         assert(buf[sign] == '0');
4465                         assert(buf[sign + 1] == 'x');
4466                         skipped = 2;
4467                         numnondigits -= 2;
4468                         break;
4469                 }
4470                 if (skipped) {
4471                         buf += skipped;
4472                         len -= skipped;
4473                         if (sign)
4474                                 buf[0] = '-';
4475                 }
4476                 assert(len == numnondigits + numdigits);
4477                 assert(numdigits > 0);
4478         }
4479
4480         /* Fill with leading zeroes to meet minimum width. */
4481         if (prec > numdigits) {
4482                 PyObject *r1 = PyString_FromStringAndSize(NULL,
4483                                         numnondigits + prec);
4484                 char *b1;
4485                 if (!r1) {
4486                         Py_DECREF(result);
4487                         return NULL;
4488                 }
4489                 b1 = PyString_AS_STRING(r1);
4490                 for (i = 0; i < numnondigits; ++i)
4491                         *b1++ = *buf++;
4492                 for (i = 0; i < prec - numdigits; i++)
4493                         *b1++ = '0';
4494                 for (i = 0; i < numdigits; i++)
4495                         *b1++ = *buf++;
4496                 *b1 = '\0';
4497                 Py_DECREF(result);
4498                 result = r1;
4499                 buf = PyString_AS_STRING(result);
4500                 len = numnondigits + prec;
4501         }
4502
4503         /* Fix up case for hex conversions. */
4504         if (type == 'X') {
4505                 /* Need to convert all lower case letters to upper case.
4506                    and need to convert 0x to 0X (and -0x to -0X). */
4507                 for (i = 0; i < len; i++)
4508                         if (buf[i] >= 'a' && buf[i] <= 'x')
4509                                 buf[i] -= 'a'-'A';
4510         }
4511         *pbuf = buf;
4512         *plen = len;
4513         return result;
4514 }
4515
4516 Py_LOCAL_INLINE(int)
4517 formatint(char *buf, size_t buflen, int flags,
4518           int prec, int type, PyObject *v)
4519 {
4520         /* fmt = '%#.' + `prec` + 'l' + `type`
4521            worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4522            + 1 + 1 = 24 */
4523         char fmt[64];   /* plenty big enough! */
4524         char *sign;
4525         long x;
4526
4527         x = PyInt_AsLong(v);
4528         if (x == -1 && PyErr_Occurred()) {
4529                 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4530                              Py_TYPE(v)->tp_name);
4531                 return -1;
4532         }
4533         if (x < 0 && type == 'u') {
4534                 type = 'd';
4535         }
4536         if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4537                 sign = "-";
4538         else
4539                 sign = "";
4540         if (prec < 0)
4541                 prec = 1;
4542
4543         if ((flags & F_ALT) &&
4544             (type == 'x' || type == 'X')) {
4545                 /* When converting under %#x or %#X, there are a number
4546                  * of issues that cause pain:
4547                  * - when 0 is being converted, the C standard leaves off
4548                  *   the '0x' or '0X', which is inconsistent with other
4549                  *   %#x/%#X conversions and inconsistent with Python's
4550                  *   hex() function
4551                  * - there are platforms that violate the standard and
4552                  *   convert 0 with the '0x' or '0X'
4553                  *   (Metrowerks, Compaq Tru64)
4554                  * - there are platforms that give '0x' when converting
4555                  *   under %#X, but convert 0 in accordance with the
4556                  *   standard (OS/2 EMX)
4557                  *
4558                  * We can achieve the desired consistency by inserting our
4559                  * own '0x' or '0X' prefix, and substituting %x/%X in place
4560                  * of %#x/%#X.
4561                  *
4562                  * Note that this is the same approach as used in
4563                  * formatint() in unicodeobject.c
4564                  */
4565                 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4566                               sign, type, prec, type);
4567         }
4568         else {
4569                 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4570                               sign, (flags&F_ALT) ? "#" : "",
4571                               prec, type);
4572         }
4573
4574         /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4575          * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4576          */
4577         if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4578                 PyErr_SetString(PyExc_OverflowError,
4579                     "formatted integer is too long (precision too large?)");
4580                 return -1;
4581         }
4582         if (sign[0])
4583                 PyOS_snprintf(buf, buflen, fmt, -x);
4584         else
4585                 PyOS_snprintf(buf, buflen, fmt, x);
4586         return (int)strlen(buf);
4587 }
4588
4589 Py_LOCAL_INLINE(int)
4590 formatchar(char *buf, size_t buflen, PyObject *v)
4591 {
4592         /* presume that the buffer is at least 2 characters long */
4593         if (PyString_Check(v)) {
4594                 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4595                         return -1;
4596         }
4597         else {
4598                 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4599                         return -1;
4600         }
4601         buf[1] = '\0';
4602         return 1;
4603 }
4604
4605 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4606
4607    FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4608    chars are formatted. XXX This is a magic number. Each formatting
4609    routine does bounds checking to ensure no overflow, but a better
4610    solution may be to malloc a buffer of appropriate size for each
4611    format. For now, the current solution is sufficient.
4612 */
4613 #define FORMATBUFLEN (size_t)120
4614
4615 PyObject *
4616 PyString_Format(PyObject *format, PyObject *args)
4617 {
4618         char *fmt, *res;
4619         Py_ssize_t arglen, argidx;
4620         Py_ssize_t reslen, rescnt, fmtcnt;
4621         int args_owned = 0;
4622         PyObject *result, *orig_args;
4623 #ifdef Py_USING_UNICODE
4624         PyObject *v, *w;
4625 #endif
4626         PyObject *dict = NULL;
4627         if (format == NULL || !PyString_Check(format) || args == NULL) {
4628                 PyErr_BadInternalCall();
4629                 return NULL;
4630         }
4631         orig_args = args;
4632         fmt = PyString_AS_STRING(format);
4633         fmtcnt = PyString_GET_SIZE(format);
4634         reslen = rescnt = fmtcnt + 100;
4635         result = PyString_FromStringAndSize((char *)NULL, reslen);
4636         if (result == NULL)
4637                 return NULL;
4638         res = PyString_AsString(result);
4639         if (PyTuple_Check(args)) {
4640                 arglen = PyTuple_GET_SIZE(args);
4641                 argidx = 0;
4642         }
4643         else {
4644                 arglen = -1;
4645                 argidx = -2;
4646         }
4647         if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&
4648             !PyObject_TypeCheck(args, &PyBaseString_Type))
4649                 dict = args;
4650         while (--fmtcnt >= 0) {
4651                 if (*fmt != '%') {
4652                         if (--rescnt < 0) {
4653                                 rescnt = fmtcnt + 100;
4654                                 reslen += rescnt;
4655                                 if (_PyString_Resize(&result, reslen) < 0)
4656                                         return NULL;
4657                                 res = PyString_AS_STRING(result)
4658                                         + reslen - rescnt;
4659                                 --rescnt;
4660                         }
4661                         *res++ = *fmt++;
4662                 }
4663                 else {
4664                         /* Got a format specifier */
4665                         int flags = 0;
4666                         Py_ssize_t width = -1;
4667                         int prec = -1;
4668                         int c = '\0';
4669                         int fill;
4670                         int isnumok;
4671                         PyObject *v = NULL;
4672                         PyObject *temp = NULL;
4673                         char *pbuf;
4674                         int sign;
4675                         Py_ssize_t len;
4676                         char formatbuf[FORMATBUFLEN];
4677                              /* For format{float,int,char}() */
4678 #ifdef Py_USING_UNICODE
4679                         char *fmt_start = fmt;
4680                         Py_ssize_t argidx_start = argidx;
4681 #endif
4682
4683                         fmt++;
4684                         if (*fmt == '(') {
4685                                 char *keystart;
4686                                 Py_ssize_t keylen;
4687                                 PyObject *key;
4688                                 int pcount = 1;
4689
4690                                 if (dict == NULL) {
4691                                         PyErr_SetString(PyExc_TypeError,
4692                                                  "format requires a mapping");
4693                                         goto error;
4694                                 }
4695                                 ++fmt;
4696                                 --fmtcnt;
4697                                 keystart = fmt;
4698                                 /* Skip over balanced parentheses */
4699                                 while (pcount > 0 && --fmtcnt >= 0) {
4700                                         if (*fmt == ')')
4701                                                 --pcount;
4702                                         else if (*fmt == '(')
4703                                                 ++pcount;
4704                                         fmt++;
4705                                 }
4706                                 keylen = fmt - keystart - 1;
4707                                 if (fmtcnt < 0 || pcount > 0) {
4708                                         PyErr_SetString(PyExc_ValueError,
4709                                                    "incomplete format key");
4710                                         goto error;
4711                                 }
4712                                 key = PyString_FromStringAndSize(keystart,
4713                                                                  keylen);
4714                                 if (key == NULL)
4715                                         goto error;
4716                                 if (args_owned) {
4717                                         Py_DECREF(args);
4718                                         args_owned = 0;
4719                                 }
4720                                 args = PyObject_GetItem(dict, key);
4721                                 Py_DECREF(key);
4722                                 if (args == NULL) {
4723                                         goto error;
4724                                 }
4725                                 args_owned = 1;
4726                                 arglen = -1;
4727                                 argidx = -2;
4728                         }
4729                         while (--fmtcnt >= 0) {
4730                                 switch (c = *fmt++) {
4731                                 case '-': flags |= F_LJUST; continue;
4732                                 case '+': flags |= F_SIGN; continue;
4733                                 case ' ': flags |= F_BLANK; continue;
4734                                 case '#': flags |= F_ALT; continue;
4735                                 case '0': flags |= F_ZERO; continue;
4736                                 }
4737                                 break;
4738                         }
4739                         if (c == '*') {
4740                                 v = getnextarg(args, arglen, &argidx);
4741                                 if (v == NULL)
4742                                         goto error;
4743                                 if (!PyInt_Check(v)) {
4744                                         PyErr_SetString(PyExc_TypeError,
4745                                                         "* wants int");
4746                                         goto error;
4747                                 }
4748                                 width = PyInt_AsLong(v);
4749                                 if (width < 0) {
4750                                         flags |= F_LJUST;
4751                                         width = -width;
4752                                 }
4753                                 if (--fmtcnt >= 0)
4754                                         c = *fmt++;
4755                         }
4756                         else if (c >= 0 && isdigit(c)) {
4757                                 width = c - '0';
4758                                 while (--fmtcnt >= 0) {
4759                                         c = Py_CHARMASK(*fmt++);
4760                                         if (!isdigit(c))
4761                                                 break;
4762                                         if ((width*10) / 10 != width) {
4763                                                 PyErr_SetString(
4764                                                         PyExc_ValueError,
4765                                                         "width too big");
4766                                                 goto error;
4767                                         }
4768                                         width = width*10 + (c - '0');
4769                                 }
4770                         }
4771                         if (c == '.') {
4772                                 prec = 0;
4773                                 if (--fmtcnt >= 0)
4774                                         c = *fmt++;
4775                                 if (c == '*') {
4776                                         v = getnextarg(args, arglen, &argidx);
4777                                         if (v == NULL)
4778                                                 goto error;
4779                                         if (!PyInt_Check(v)) {
4780                                                 PyErr_SetString(
4781                                                         PyExc_TypeError,
4782                                                         "* wants int");
4783                                                 goto error;
4784                                         }
4785                                         prec = PyInt_AsLong(v);
4786                                         if (prec < 0)
4787                                                 prec = 0;
4788                                         if (--fmtcnt >= 0)
4789                                                 c = *fmt++;
4790                                 }
4791                                 else if (c >= 0 && isdigit(c)) {
4792                                         prec = c - '0';
4793                                         while (--fmtcnt >= 0) {
4794                                                 c = Py_CHARMASK(*fmt++);
4795                                                 if (!isdigit(c))
4796                                                         break;
4797                                                 if ((prec*10) / 10 != prec) {
4798                                                         PyErr_SetString(
4799                                                             PyExc_ValueError,
4800                                                             "prec too big");
4801                                                         goto error;
4802                                                 }
4803                                                 prec = prec*10 + (c - '0');
4804                                         }
4805                                 }
4806                         } /* prec */
4807                         if (fmtcnt >= 0) {
4808                                 if (c == 'h' || c == 'l' || c == 'L') {
4809                                         if (--fmtcnt >= 0)
4810                                                 c = *fmt++;
4811                                 }
4812                         }
4813                         if (fmtcnt < 0) {
4814                                 PyErr_SetString(PyExc_ValueError,
4815                                                 "incomplete format");
4816                                 goto error;
4817                         }
4818                         if (c != '%') {
4819                                 v = getnextarg(args, arglen, &argidx);
4820                                 if (v == NULL)
4821                                         goto error;
4822                         }
4823                         sign = 0;
4824                         fill = ' ';
4825                         switch (c) {
4826                         case '%':
4827                                 pbuf = "%";
4828                                 len = 1;
4829                                 break;
4830                         case 's':
4831 #ifdef Py_USING_UNICODE
4832                                 if (PyUnicode_Check(v)) {
4833                                         fmt = fmt_start;
4834                                         argidx = argidx_start;
4835                                         goto unicode;
4836                                 }
4837 #endif
4838                                 temp = _PyObject_Str(v);
4839 #ifdef Py_USING_UNICODE
4840                                 if (temp != NULL && PyUnicode_Check(temp)) {
4841                                         Py_DECREF(temp);
4842                                         fmt = fmt_start;
4843                                         argidx = argidx_start;
4844                                         goto unicode;
4845                                 }
4846 #endif
4847                                 /* Fall through */
4848                         case 'r':
4849                                 if (c == 'r')
4850                                         temp = PyObject_Repr(v);
4851                                 if (temp == NULL)
4852                                         goto error;
4853                                 if (!PyString_Check(temp)) {
4854                                         PyErr_SetString(PyExc_TypeError,
4855                                           "%s argument has non-string str()");
4856                                         Py_DECREF(temp);
4857                                         goto error;
4858                                 }
4859                                 pbuf = PyString_AS_STRING(temp);
4860                                 len = PyString_GET_SIZE(temp);
4861                                 if (prec >= 0 && len > prec)
4862                                         len = prec;
4863                                 break;
4864                         case 'i':
4865                         case 'd':
4866                         case 'u':
4867                         case 'o':
4868                         case 'x':
4869                         case 'X':
4870                                 if (c == 'i')
4871                                         c = 'd';
4872                                 isnumok = 0;
4873                                 if (PyNumber_Check(v)) {
4874                                         PyObject *iobj=NULL;
4875
4876                                         if (PyInt_Check(v) || (PyLong_Check(v))) {
4877                                                 iobj = v;
4878                                                 Py_INCREF(iobj);
4879                                         }
4880                                         else {
4881                                                 iobj = PyNumber_Int(v);
4882                                                 if (iobj==NULL) iobj = PyNumber_Long(v);
4883                                         }
4884                                         if (iobj!=NULL) {
4885                                                 if (PyInt_Check(iobj)) {
4886                                                         isnumok = 1;
4887                                                         pbuf = formatbuf;
4888                                                         len = formatint(pbuf,
4889                                                                         sizeof(formatbuf),
4890                                                                         flags, prec, c, iobj);
4891                                                         Py_DECREF(iobj);
4892                                                         if (len < 0)
4893                                                                 goto error;
4894                                                         sign = 1;
4895                                                 }
4896                                                 else if (PyLong_Check(iobj)) {
4897                                                         int ilen;
4898
4899                                                         isnumok = 1;
4900                                                         temp = _PyString_FormatLong(iobj, flags,
4901                                                                 prec, c, &pbuf, &ilen);
4902                                                         Py_DECREF(iobj);
4903                                                         len = ilen;
4904                                                         if (!temp)
4905                                                                 goto error;
4906                                                         sign = 1;
4907                                                 }
4908                                                 else {
4909                                                         Py_DECREF(iobj);
4910                                                 }
4911                                         }
4912                                 }
4913                                 if (!isnumok) {
4914                                         PyErr_Format(PyExc_TypeError,
4915                                             "%%%c format: a number is required, "
4916                                             "not %.200s", c, Py_TYPE(v)->tp_name);
4917                                         goto error;
4918                                 }
4919                                 if (flags & F_ZERO)
4920                                         fill = '0';
4921                                 break;
4922                         case 'e':
4923                         case 'E':
4924                         case 'f':
4925                         case 'F':
4926                         case 'g':
4927                         case 'G':
4928                                 if (c == 'F')
4929                                         c = 'f';
4930                                 pbuf = formatbuf;
4931                                 len = formatfloat(pbuf, sizeof(formatbuf),
4932                                                   flags, prec, c, v);
4933                                 if (len < 0)
4934                                         goto error;
4935                                 sign = 1;
4936                                 if (flags & F_ZERO)
4937                                         fill = '0';
4938                                 break;
4939                         case 'c':
4940 #ifdef Py_USING_UNICODE
4941                                 if (PyUnicode_Check(v)) {
4942                                         fmt = fmt_start;
4943                                         argidx = argidx_start;
4944                                         goto unicode;
4945                                 }
4946 #endif
4947                                 pbuf = formatbuf;
4948                                 len = formatchar(pbuf, sizeof(formatbuf), v);
4949                                 if (len < 0)
4950                                         goto error;
4951                                 break;
4952                         default:
4953                                 PyErr_Format(PyExc_ValueError,
4954                                   "unsupported format character '%c' (0x%x) "
4955                                   "at index %zd",
4956                                   c, c,
4957                                   (Py_ssize_t)(fmt - 1 -
4958                                                PyString_AsString(format)));
4959                                 goto error;
4960                         }
4961                         if (sign) {
4962                                 if (*pbuf == '-' || *pbuf == '+') {
4963                                         sign = *pbuf++;
4964                                         len--;
4965                                 }
4966                                 else if (flags & F_SIGN)
4967                                         sign = '+';
4968                                 else if (flags & F_BLANK)
4969                                         sign = ' ';
4970                                 else
4971                                         sign = 0;
4972                         }
4973                         if (width < len)
4974                                 width = len;
4975                         if (rescnt - (sign != 0) < width) {
4976                                 reslen -= rescnt;
4977                                 rescnt = width + fmtcnt + 100;
4978                                 reslen += rescnt;
4979                                 if (reslen < 0) {
4980                                         Py_DECREF(result);
4981                                         Py_XDECREF(temp);
4982                                         return PyErr_NoMemory();
4983                                 }
4984                                 if (_PyString_Resize(&result, reslen) < 0) {
4985                                         Py_XDECREF(temp);
4986                                         return NULL;
4987                                 }
4988                                 res = PyString_AS_STRING(result)
4989                                         + reslen - rescnt;
4990                         }
4991                         if (sign) {
4992                                 if (fill != ' ')
4993                                         *res++ = sign;
4994                                 rescnt--;
4995                                 if (width > len)
4996                                         width--;
4997                         }
4998                         if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4999                                 assert(pbuf[0] == '0');
5000                                 assert(pbuf[1] == c);
5001                                 if (fill != ' ') {
5002                                         *res++ = *pbuf++;
5003                                         *res++ = *pbuf++;
5004                                 }
5005                                 rescnt -= 2;
5006                                 width -= 2;
5007                                 if (width < 0)
5008                                         width = 0;
5009                                 len -= 2;
5010                         }
5011                         if (width > len && !(flags & F_LJUST)) {
5012                                 do {
5013                                         --rescnt;
5014                                         *res++ = fill;
5015                                 } while (--width > len);
5016                         }
5017                         if (fill == ' ') {
5018                                 if (sign)
5019                                         *res++ = sign;
5020                                 if ((flags & F_ALT) &&
5021                                     (c == 'x' || c == 'X')) {
5022                                         assert(pbuf[0] == '0');
5023                                         assert(pbuf[1] == c);
5024                                         *res++ = *pbuf++;
5025                                         *res++ = *pbuf++;
5026                                 }
5027                         }
5028                         Py_MEMCPY(res, pbuf, len);
5029                         res += len;
5030                         rescnt -= len;
5031                         while (--width >= len) {
5032                                 --rescnt;
5033                                 *res++ = ' ';
5034                         }
5035                         if (dict && (argidx < arglen) && c != '%') {
5036                                 PyErr_SetString(PyExc_TypeError,
5037                                            "not all arguments converted during string formatting");
5038                                 Py_XDECREF(temp);
5039                                 goto error;
5040                         }
5041                         Py_XDECREF(temp);
5042                 } /* '%' */
5043         } /* until end */
5044         if (argidx < arglen && !dict) {
5045                 PyErr_SetString(PyExc_TypeError,
5046                                 "not all arguments converted during string formatting");
5047                 goto error;
5048         }
5049         if (args_owned) {
5050                 Py_DECREF(args);
5051         }
5052         _PyString_Resize(&result, reslen - rescnt);
5053         return result;
5054
5055 #ifdef Py_USING_UNICODE
5056  unicode:
5057         if (args_owned) {
5058                 Py_DECREF(args);
5059                 args_owned = 0;
5060         }
5061         /* Fiddle args right (remove the first argidx arguments) */
5062         if (PyTuple_Check(orig_args) && argidx > 0) {
5063                 PyObject *v;
5064                 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
5065                 v = PyTuple_New(n);
5066                 if (v == NULL)
5067                         goto error;
5068                 while (--n >= 0) {
5069                         PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
5070                         Py_INCREF(w);
5071                         PyTuple_SET_ITEM(v, n, w);
5072                 }
5073                 args = v;
5074         } else {
5075                 Py_INCREF(orig_args);
5076                 args = orig_args;
5077         }
5078         args_owned = 1;
5079         /* Take what we have of the result and let the Unicode formatting
5080            function format the rest of the input. */
5081         rescnt = res - PyString_AS_STRING(result);
5082         if (_PyString_Resize(&result, rescnt))
5083                 goto error;
5084         fmtcnt = PyString_GET_SIZE(format) - \
5085                  (fmt - PyString_AS_STRING(format));
5086         format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
5087         if (format == NULL)
5088                 goto error;
5089         v = PyUnicode_Format(format, args);
5090         Py_DECREF(format);
5091         if (v == NULL)
5092                 goto error;
5093         /* Paste what we have (result) to what the Unicode formatting
5094            function returned (v) and return the result (or error) */
5095         w = PyUnicode_Concat(result, v);
5096         Py_DECREF(result);
5097         Py_DECREF(v);
5098         Py_DECREF(args);
5099         return w;
5100 #endif /* Py_USING_UNICODE */
5101
5102  error:
5103         Py_DECREF(result);
5104         if (args_owned) {
5105                 Py_DECREF(args);
5106         }
5107         return NULL;
5108 }
5109
5110 void
5111 PyString_InternInPlace(PyObject **p)
5112 {
5113         register PyStringObject *s = (PyStringObject *)(*p);
5114         PyObject *t;
5115         if (s == NULL || !PyString_Check(s))
5116                 Py_FatalError("PyString_InternInPlace: strings only please!");
5117         /* If it's a string subclass, we don't really know what putting
5118            it in the interned dict might do. */
5119         if (!PyString_CheckExact(s))
5120                 return;
5121         if (PyString_CHECK_INTERNED(s))
5122                 return;
5123         if (interned == NULL) {
5124                 interned = PyDict_New();
5125                 if (interned == NULL) {
5126                         PyErr_Clear(); /* Don't leave an exception */
5127                         return;
5128                 }
5129         }
5130         t = PyDict_GetItem(interned, (PyObject *)s);
5131         if (t) {
5132                 Py_INCREF(t);
5133                 Py_DECREF(*p);
5134                 *p = t;
5135                 return;
5136         }
5137
5138         if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
5139                 PyErr_Clear();
5140                 return;
5141         }
5142         /* The two references in interned are not counted by refcnt.
5143            The string deallocator will take care of this */
5144         Py_REFCNT(s) -= 2;
5145         PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
5146 }
5147
5148 void
5149 PyString_InternImmortal(PyObject **p)
5150 {
5151         PyString_InternInPlace(p);
5152         if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
5153                 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
5154                 Py_INCREF(*p);
5155         }
5156 }
5157
5158
5159 PyObject *
5160 PyString_InternFromString(const char *cp)
5161 {
5162         PyObject *s = PyString_FromString(cp);
5163         if (s == NULL)
5164                 return NULL;
5165         PyString_InternInPlace(&s);
5166         return s;
5167 }
5168
5169 void
5170 PyString_Fini(void)
5171 {
5172         int i;
5173         for (i = 0; i < UCHAR_MAX + 1; i++) {
5174                 Py_XDECREF(characters[i]);
5175                 characters[i] = NULL;
5176         }
5177         Py_XDECREF(nullstring);
5178         nullstring = NULL;
5179 }
5180
5181 void _Py_ReleaseInternedStrings(void)
5182 {
5183         PyObject *keys;
5184         PyStringObject *s;
5185         Py_ssize_t i, n;
5186         Py_ssize_t immortal_size = 0, mortal_size = 0;
5187
5188         if (interned == NULL || !PyDict_Check(interned))
5189                 return;
5190         keys = PyDict_Keys(interned);
5191         if (keys == NULL || !PyList_Check(keys)) {
5192                 PyErr_Clear();
5193                 return;
5194         }
5195
5196         /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5197            detector, interned strings are not forcibly deallocated; rather, we
5198            give them their stolen references back, and then clear and DECREF
5199            the interned dict. */
5200
5201         n = PyList_GET_SIZE(keys);
5202         fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
5203                 n);
5204         for (i = 0; i < n; i++) {
5205                 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
5206                 switch (s->ob_sstate) {
5207                 case SSTATE_NOT_INTERNED:
5208                         /* XXX Shouldn't happen */
5209                         break;
5210                 case SSTATE_INTERNED_IMMORTAL:
5211                         Py_REFCNT(s) += 1;
5212                         immortal_size += Py_SIZE(s);
5213                         break;
5214                 case SSTATE_INTERNED_MORTAL:
5215                         Py_REFCNT(s) += 2;
5216                         mortal_size += Py_SIZE(s);
5217                         break;
5218                 default:
5219                         Py_FatalError("Inconsistent interned string state.");
5220                 }
5221                 s->ob_sstate = SSTATE_NOT_INTERNED;
5222         }
5223         fprintf(stderr, "total size of all interned strings: "
5224                         "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
5225                         "mortal/immortal\n", mortal_size, immortal_size);
5226         Py_DECREF(keys);
5227         PyDict_Clear(interned);
5228         Py_DECREF(interned);
5229         interned = NULL;
5230 }