Objects/stringobject.c

   1 /* String object implementation */
   2
   3 #define PY_SSIZE_T_CLEAN
   4
   5 #include "Python.h"
   6
   7 #include <ctype.h>
   8
   9 #ifdef COUNT_ALLOCS
  10 int null_strings, one_strings;
  11 #endif
  12
  13 static PyStringObject *characters[UCHAR_MAX + 1];
  14 static PyStringObject *nullstring;
  15
  16 /* This dictionary holds all interned strings.  Note that references to
  17    strings in this dictionary are *not* counted in the string's ob_refcnt.
  18    When the interned string reaches a refcnt of 0 the string deallocation
  19    function will delete the reference from this dictionary.
  20
  21    Another way to look at this is that to say that the actual reference
  22    count of a string is:  s->ob_refcnt + (s->ob_sstate?2:0)
  23 */
  24 static PyObject *interned;
  25
  26 /*
  27    For both PyString_FromString() and PyString_FromStringAndSize(), the
  28    parameter `size' denotes number of characters to allocate, not counting any
  29    null terminating character.
  30
  31    For PyString_FromString(), the parameter `str' points to a null-terminated
  32    string containing exactly `size' bytes.
  33
  34    For PyString_FromStringAndSize(), the parameter the parameter `str' is
  35    either NULL or else points to a string containing at least `size' bytes.
  36    For PyString_FromStringAndSize(), the string in the `str' parameter does
  37    not have to be null-terminated.  (Therefore it is safe to construct a
  38    substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
  39    If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
  40    bytes (setting the last byte to the null terminating character) and you can
  41    fill in the data yourself.  If `str' is non-NULL then the resulting
  42    PyString object must be treated as immutable and you must not fill in nor
  43    alter the data yourself, since the strings may be shared.
  44
  45    The PyObject member `op->ob_size', which denotes the number of "extra
  46    items" in a variable-size object, will contain the number of bytes
  47    allocated for string data, not counting the null terminating character.  It
  48    is therefore equal to the equal to the `size' parameter (for
  49    PyString_FromStringAndSize()) or the length of the string in the `str'
  50    parameter (for PyString_FromString()).
  51 */
  52 PyObject *
  53 PyString_FromStringAndSize(const char *str, Py_ssize_t size)
  54 {
  55         register PyStringObject *op;
  56         assert(size >= 0);
  57         if (size == 0 && (op = nullstring) != NULL) {
  58 #ifdef COUNT_ALLOCS
  59                 null_strings++;
  60 #endif
  61                 Py_INCREF(op);
  62                 return (PyObject *)op;
  63         }
  64         if (size == 1 && str != NULL &&
  65             (op = characters[*str & UCHAR_MAX]) != NULL)
  66         {
  67 #ifdef COUNT_ALLOCS
  68                 one_strings++;
  69 #endif
  70                 Py_INCREF(op);
  71                 return (PyObject *)op;
  72         }
  73
  74         /* Inline PyObject_NewVar */
  75         op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
  76         if (op == NULL)
  77                 return PyErr_NoMemory();
  78         PyObject_INIT_VAR(op, &PyString_Type, size);
  79         op->ob_shash = -1;
  80         op->ob_sstate = SSTATE_NOT_INTERNED;
  81         if (str != NULL)
  82                 Py_MEMCPY(op->ob_sval, str, size);
  83         op->ob_sval[size] = '\0';
  84         /* share short strings */
  85         if (size == 0) {
  86                 PyObject *t = (PyObject *)op;
  87                 PyString_InternInPlace(&t);
  88                 op = (PyStringObject *)t;
  89                 nullstring = op;
  90                 Py_INCREF(op);
  91         } else if (size == 1 && str != NULL) {
  92                 PyObject *t = (PyObject *)op;
  93                 PyString_InternInPlace(&t);
  94                 op = (PyStringObject *)t;
  95                 characters[*str & UCHAR_MAX] = op;
  96                 Py_INCREF(op);
  97         }
  98         return (PyObject *) op;
  99 }
 100
 101 PyObject *
 102 PyString_FromString(const char *str)
 103 {
 104         register size_t size;
 105         register PyStringObject *op;
 106
 107         assert(str != NULL);
 108         size = strlen(str);
 109         if (size > PY_SSIZE_T_MAX) {
 110                 PyErr_SetString(PyExc_OverflowError,
 111                         "string is too long for a Python string");
 112                 return NULL;
 113         }
 114         if (size == 0 && (op = nullstring) != NULL) {
 115 #ifdef COUNT_ALLOCS
 116                 null_strings++;
 117 #endif
 118                 Py_INCREF(op);
 119                 return (PyObject *)op;
 120         }
 121         if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
 122 #ifdef COUNT_ALLOCS
 123                 one_strings++;
 124 #endif
 125                 Py_INCREF(op);
 126                 return (PyObject *)op;
 127         }
 128
 129         /* Inline PyObject_NewVar */
 130         op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
 131         if (op == NULL)
 132                 return PyErr_NoMemory();
 133         PyObject_INIT_VAR(op, &PyString_Type, size);
 134         op->ob_shash = -1;
 135         op->ob_sstate = SSTATE_NOT_INTERNED;
 136         Py_MEMCPY(op->ob_sval, str, size+1);
 137         /* share short strings */
 138         if (size == 0) {
 139                 PyObject *t = (PyObject *)op;
 140                 PyString_InternInPlace(&t);
 141                 op = (PyStringObject *)t;
 142                 nullstring = op;
 143                 Py_INCREF(op);
 144         } else if (size == 1) {
 145                 PyObject *t = (PyObject *)op;
 146                 PyString_InternInPlace(&t);
 147                 op = (PyStringObject *)t;
 148                 characters[*str & UCHAR_MAX] = op;
 149                 Py_INCREF(op);
 150         }
 151         return (PyObject *) op;
 152 }
 153
 154 PyObject *
 155 PyString_FromFormatV(const char *format, va_list vargs)
 156 {
 157         va_list count;
 158         Py_ssize_t n = 0;
 159         const char* f;
 160         char *s;
 161         PyObject* string;
 162
 163 #ifdef VA_LIST_IS_ARRAY
 164         Py_MEMCPY(count, vargs, sizeof(va_list));
 165 #else
 166 #ifdef  __va_copy
 167         __va_copy(count, vargs);
 168 #else
 169         count = vargs;
 170 #endif
 171 #endif
 172         /* step 1: figure out how large a buffer we need */
 173         for (f = format; *f; f++) {
 174                 if (*f == '%') {
 175                         const char* p = f;
 176                         while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
 177                                 ;
 178
 179                         /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
 180                          * they don't affect the amount of space we reserve.
 181                          */
 182                         if ((*f == 'l' || *f == 'z') &&
 183                                         (f[1] == 'd' || f[1] == 'u'))
 184                                 ++f;
 185
 186                         switch (*f) {
 187                         case 'c':
 188                                 (void)va_arg(count, int);
 189                                 /* fall through... */
 190                         case '%':
 191                                 n++;
 192                                 break;
 193                         case 'd': case 'u': case 'i': case 'x':
 194                                 (void) va_arg(count, int);
 195                                 /* 20 bytes is enough to hold a 64-bit
 196                                    integer.  Decimal takes the most space.
 197                                    This isn't enough for octal. */
 198                                 n += 20;
 199                                 break;
 200                         case 's':
 201                                 s = va_arg(count, char*);
 202                                 n += strlen(s);
 203                                 break;
 204                         case 'p':
 205                                 (void) va_arg(count, int);
 206                                 /* maximum 64-bit pointer representation:
 207                                  * 0xffffffffffffffff
 208                                  * so 19 characters is enough.
 209                                  * XXX I count 18 -- what's the extra for?
 210                                  */
 211                                 n += 19;
 212                                 break;
 213                         default:
 214                                 /* if we stumble upon an unknown
 215                                    formatting code, copy the rest of
 216                                    the format string to the output
 217                                    string. (we cannot just skip the
 218                                    code, since there's no way to know
 219                                    what's in the argument list) */
 220                                 n += strlen(p);
 221                                 goto expand;
 222                         }
 223                 } else
 224                         n++;
 225         }
 226  expand:
 227         /* step 2: fill the buffer */
 228         /* Since we've analyzed how much space we need for the worst case,
 229            use sprintf directly instead of the slower PyOS_snprintf. */
 230         string = PyString_FromStringAndSize(NULL, n);
 231         if (!string)
 232                 return NULL;
 233
 234         s = PyString_AsString(string);
 235
 236         for (f = format; *f; f++) {
 237                 if (*f == '%') {
 238                         const char* p = f++;
 239                         Py_ssize_t i;
 240                         int longflag = 0;
 241                         int size_tflag = 0;
 242                         /* parse the width.precision part (we're only
 243                            interested in the precision value, if any) */
 244                         n = 0;
 245                         while (isdigit(Py_CHARMASK(*f)))
 246                                 n = (n*10) + *f++ - '0';
 247                         if (*f == '.') {
 248                                 f++;
 249                                 n = 0;
 250                                 while (isdigit(Py_CHARMASK(*f)))
 251                                         n = (n*10) + *f++ - '0';
 252                         }
 253                         while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
 254                                 f++;
 255                         /* handle the long flag, but only for %ld and %lu.
 256                            others can be added when necessary. */
 257                         if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
 258                                 longflag = 1;
 259                                 ++f;
 260                         }
 261                         /* handle the size_t flag. */
 262                         if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
 263                                 size_tflag = 1;
 264                                 ++f;
 265                         }
 266
 267                         switch (*f) {
 268                         case 'c':
 269                                 *s++ = va_arg(vargs, int);
 270                                 break;
 271                         case 'd':
 272                                 if (longflag)
 273                                         sprintf(s, "%ld", va_arg(vargs, long));
 274                                 else if (size_tflag)
 275                                         sprintf(s, "%" PY_FORMAT_SIZE_T "d",
 276                                                 va_arg(vargs, Py_ssize_t));
 277                                 else
 278                                         sprintf(s, "%d", va_arg(vargs, int));
 279                                 s += strlen(s);
 280                                 break;
 281                         case 'u':
 282                                 if (longflag)
 283                                         sprintf(s, "%lu",
 284                                                 va_arg(vargs, unsigned long));
 285                                 else if (size_tflag)
 286                                         sprintf(s, "%" PY_FORMAT_SIZE_T "u",
 287                                                 va_arg(vargs, size_t));
 288                                 else
 289                                         sprintf(s, "%u",
 290                                                 va_arg(vargs, unsigned int));
 291                                 s += strlen(s);
 292                                 break;
 293                         case 'i':
 294                                 sprintf(s, "%i", va_arg(vargs, int));
 295                                 s += strlen(s);
 296                                 break;
 297                         case 'x':
 298                                 sprintf(s, "%x", va_arg(vargs, int));
 299                                 s += strlen(s);
 300                                 break;
 301                         case 's':
 302                                 p = va_arg(vargs, char*);
 303                                 i = strlen(p);
 304                                 if (n > 0 && i > n)
 305                                         i = n;
 306                                 Py_MEMCPY(s, p, i);
 307                                 s += i;
 308                                 break;
 309                         case 'p':
 310                                 sprintf(s, "%p", va_arg(vargs, void*));
 311                                 /* %p is ill-defined:  ensure leading 0x. */
 312                                 if (s[1] == 'X')
 313                                         s[1] = 'x';
 314                                 else if (s[1] != 'x') {
 315                                         memmove(s+2, s, strlen(s)+1);
 316                                         s[0] = '0';
 317                                         s[1] = 'x';
 318                                 }
 319                                 s += strlen(s);
 320                                 break;
 321                         case '%':
 322                                 *s++ = '%';
 323                                 break;
 324                         default:
 325                                 strcpy(s, p);
 326                                 s += strlen(s);
 327                                 goto end;
 328                         }
 329                 } else
 330                         *s++ = *f;
 331         }
 332
 333  end:
 334         _PyString_Resize(&string, s - PyString_AS_STRING(string));
 335         return string;
 336 }
 337
 338 PyObject *
 339 PyString_FromFormat(const char *format, ...)
 340 {
 341         PyObject* ret;
 342         va_list vargs;
 343
 344 #ifdef HAVE_STDARG_PROTOTYPES
 345         va_start(vargs, format);
 346 #else
 347         va_start(vargs);
 348 #endif
 349         ret = PyString_FromFormatV(format, vargs);
 350         va_end(vargs);
 351         return ret;
 352 }
 353
 354
 355 PyObject *PyString_Decode(const char *s,
 356                           Py_ssize_t size,
 357                           const char *encoding,
 358                           const char *errors)
 359 {
 360     PyObject *v, *str;
 361
 362     str = PyString_FromStringAndSize(s, size);
 363     if (str == NULL)
 364         return NULL;
 365     v = PyString_AsDecodedString(str, encoding, errors);
 366     Py_DECREF(str);
 367     return v;
 368 }
 369
 370 PyObject *PyString_AsDecodedObject(PyObject *str,
 371                                    const char *encoding,
 372                                    const char *errors)
 373 {
 374     PyObject *v;
 375
 376     if (!PyString_Check(str)) {
 377         PyErr_BadArgument();
 378         goto onError;
 379     }
 380
 381     if (encoding == NULL) {
 382 #ifdef Py_USING_UNICODE
 383         encoding = PyUnicode_GetDefaultEncoding();
 384 #else
 385         PyErr_SetString(PyExc_ValueError, "no encoding specified");
 386         goto onError;
 387 #endif
 388     }
 389
 390     /* Decode via the codec registry */
 391     v = PyCodec_Decode(str, encoding, errors);
 392     if (v == NULL)
 393         goto onError;
 394
 395     return v;
 396
 397  onError:
 398     return NULL;
 399 }
 400
 401 PyObject *PyString_AsDecodedString(PyObject *str,
 402                                    const char *encoding,
 403                                    const char *errors)
 404 {
 405     PyObject *v;
 406
 407     v = PyString_AsDecodedObject(str, encoding, errors);
 408     if (v == NULL)
 409         goto onError;
 410
 411 #ifdef Py_USING_UNICODE
 412     /* Convert Unicode to a string using the default encoding */
 413     if (PyUnicode_Check(v)) {
 414         PyObject *temp = v;
 415         v = PyUnicode_AsEncodedString(v, NULL, NULL);
 416         Py_DECREF(temp);
 417         if (v == NULL)
 418             goto onError;
 419     }
 420 #endif
 421     if (!PyString_Check(v)) {
 422         PyErr_Format(PyExc_TypeError,
 423                      "decoder did not return a string object (type=%.400s)",
 424                      Py_Type(v)->tp_name);
 425         Py_DECREF(v);
 426         goto onError;
 427     }
 428
 429     return v;
 430
 431  onError:
 432     return NULL;
 433 }
 434
 435 PyObject *PyString_Encode(const char *s,
 436                           Py_ssize_t size,
 437                           const char *encoding,
 438                           const char *errors)
 439 {
 440     PyObject *v, *str;
 441
 442     str = PyString_FromStringAndSize(s, size);
 443     if (str == NULL)
 444         return NULL;
 445     v = PyString_AsEncodedString(str, encoding, errors);
 446     Py_DECREF(str);
 447     return v;
 448 }
 449
 450 PyObject *PyString_AsEncodedObject(PyObject *str,
 451                                    const char *encoding,
 452                                    const char *errors)
 453 {
 454     PyObject *v;
 455
 456     if (!PyString_Check(str)) {
 457         PyErr_BadArgument();
 458         goto onError;
 459     }
 460
 461     if (encoding == NULL) {
 462 #ifdef Py_USING_UNICODE
 463         encoding = PyUnicode_GetDefaultEncoding();
 464 #else
 465         PyErr_SetString(PyExc_ValueError, "no encoding specified");
 466         goto onError;
 467 #endif
 468     }
 469
 470     /* Encode via the codec registry */
 471     v = PyCodec_Encode(str, encoding, errors);
 472     if (v == NULL)
 473         goto onError;
 474
 475     return v;
 476
 477  onError:
 478     return NULL;
 479 }
 480
 481 PyObject *PyString_AsEncodedString(PyObject *str,
 482                                    const char *encoding,
 483                                    const char *errors)
 484 {
 485     PyObject *v;
 486
 487     v = PyString_AsEncodedObject(str, encoding, errors);
 488     if (v == NULL)
 489         goto onError;
 490
 491 #ifdef Py_USING_UNICODE
 492     /* Convert Unicode to a string using the default encoding */
 493     if (PyUnicode_Check(v)) {
 494         PyObject *temp = v;
 495         v = PyUnicode_AsEncodedString(v, NULL, NULL);
 496         Py_DECREF(temp);
 497         if (v == NULL)
 498             goto onError;
 499     }
 500 #endif
 501     if (!PyString_Check(v)) {
 502         PyErr_Format(PyExc_TypeError,
 503                      "encoder did not return a string object (type=%.400s)",
 504                      Py_Type(v)->tp_name);
 505         Py_DECREF(v);
 506         goto onError;
 507     }
 508
 509     return v;
 510
 511  onError:
 512     return NULL;
 513 }
 514
 515 static void
 516 string_dealloc(PyObject *op)
 517 {
 518         switch (PyString_CHECK_INTERNED(op)) {
 519                 case SSTATE_NOT_INTERNED:
 520                         break;
 521
 522                 case SSTATE_INTERNED_MORTAL:
 523                         /* revive dead object temporarily for DelItem */
 524                         Py_Refcnt(op) = 3;
 525                         if (PyDict_DelItem(interned, op) != 0)
 526                                 Py_FatalError(
 527                                         "deletion of interned string failed");
 528                         break;
 529
 530                 case SSTATE_INTERNED_IMMORTAL:
 531                         Py_FatalError("Immortal interned string died.");
 532
 533                 default:
 534                         Py_FatalError("Inconsistent interned string state.");
 535         }
 536         Py_Type(op)->tp_free(op);
 537 }
 538
 539 /* Unescape a backslash-escaped string. If unicode is non-zero,
 540    the string is a u-literal. If recode_encoding is non-zero,
 541    the string is UTF-8 encoded and should be re-encoded in the
 542    specified encoding.  */
 543
 544 PyObject *PyString_DecodeEscape(const char *s,
 545                                 Py_ssize_t len,
 546                                 const char *errors,
 547                                 Py_ssize_t unicode,
 548                                 const char *recode_encoding)
 549 {
 550         int c;
 551         char *p, *buf;
 552         const char *end;
 553         PyObject *v;
 554         Py_ssize_t newlen = recode_encoding ? 4*len:len;
 555         v = PyString_FromStringAndSize((char *)NULL, newlen);
 556         if (v == NULL)
 557                 return NULL;
 558         p = buf = PyString_AsString(v);
 559         end = s + len;
 560         while (s < end) {
 561                 if (*s != '\\') {
 562                   non_esc:
 563 #ifdef Py_USING_UNICODE
 564                         if (recode_encoding && (*s & 0x80)) {
 565                                 PyObject *u, *w;
 566                                 char *r;
 567                                 const char* t;
 568                                 Py_ssize_t rn;
 569                                 t = s;
 570                                 /* Decode non-ASCII bytes as UTF-8. */
 571                                 while (t < end && (*t & 0x80)) t++;
 572                                 u = PyUnicode_DecodeUTF8(s, t - s, errors);
 573                                 if(!u) goto failed;
 574
 575                                 /* Recode them in target encoding. */
 576                                 w = PyUnicode_AsEncodedString(
 577                                         u, recode_encoding, errors);
 578                                 Py_DECREF(u);
 579                                 if (!w) goto failed;
 580
 581                                 /* Append bytes to output buffer. */
 582                                 assert(PyString_Check(w));
 583                                 r = PyString_AS_STRING(w);
 584                                 rn = PyString_GET_SIZE(w);
 585                                 Py_MEMCPY(p, r, rn);
 586                                 p += rn;
 587                                 Py_DECREF(w);
 588                                 s = t;
 589                         } else {
 590                                 *p++ = *s++;
 591                         }
 592 #else
 593                         *p++ = *s++;
 594 #endif
 595                         continue;
 596                 }
 597                 s++;
 598                 if (s==end) {
 599                         PyErr_SetString(PyExc_ValueError,
 600                                         "Trailing \\ in string");
 601                         goto failed;
 602                 }
 603                 switch (*s++) {
 604                 /* XXX This assumes ASCII! */
 605                 case '\n': break;
 606                 case '\\': *p++ = '\\'; break;
 607                 case '\'': *p++ = '\''; break;
 608                 case '\"': *p++ = '\"'; break;
 609                 case 'b': *p++ = '\b'; break;
 610                 case 'f': *p++ = '\014'; break; /* FF */
 611                 case 't': *p++ = '\t'; break;
 612                 case 'n': *p++ = '\n'; break;
 613                 case 'r': *p++ = '\r'; break;
 614                 case 'v': *p++ = '\013'; break; /* VT */
 615                 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
 616                 case '0': case '1': case '2': case '3':
 617                 case '4': case '5': case '6': case '7':
 618                         c = s[-1] - '0';
 619                         if ('0' <= *s && *s <= '7') {
 620                                 c = (c<<3) + *s++ - '0';
 621                                 if ('0' <= *s && *s <= '7')
 622                                         c = (c<<3) + *s++ - '0';
 623                         }
 624                         *p++ = c;
 625                         break;
 626                 case 'x':
 627                         if (isxdigit(Py_CHARMASK(s[0]))
 628                             && isxdigit(Py_CHARMASK(s[1]))) {
 629                                 unsigned int x = 0;
 630                                 c = Py_CHARMASK(*s);
 631                                 s++;
 632                                 if (isdigit(c))
 633                                         x = c - '0';
 634                                 else if (islower(c))
 635                                         x = 10 + c - 'a';
 636                                 else
 637                                         x = 10 + c - 'A';
 638                                 x = x << 4;
 639                                 c = Py_CHARMASK(*s);
 640                                 s++;
 641                                 if (isdigit(c))
 642                                         x += c - '0';
 643                                 else if (islower(c))
 644                                         x += 10 + c - 'a';
 645                                 else
 646                                         x += 10 + c - 'A';
 647                                 *p++ = x;
 648                                 break;
 649                         }
 650                         if (!errors || strcmp(errors, "strict") == 0) {
 651                                 PyErr_SetString(PyExc_ValueError,
 652                                                 "invalid \\x escape");
 653                                 goto failed;
 654                         }
 655                         if (strcmp(errors, "replace") == 0) {
 656                                 *p++ = '?';
 657                         } else if (strcmp(errors, "ignore") == 0)
 658                                 /* do nothing */;
 659                         else {
 660                                 PyErr_Format(PyExc_ValueError,
 661                                              "decoding error; "
 662                                              "unknown error handling code: %.400s",
 663                                              errors);
 664                                 goto failed;
 665                         }
 666 #ifndef Py_USING_UNICODE
 667                 case 'u':
 668                 case 'U':
 669                 case 'N':
 670                         if (unicode) {
 671                                 PyErr_SetString(PyExc_ValueError,
 672                                           "Unicode escapes not legal "
 673                                           "when Unicode disabled");
 674                                 goto failed;
 675                         }
 676 #endif
 677                 default:
 678                         *p++ = '\\';
 679                         s--;
 680                         goto non_esc; /* an arbitry number of unescaped
 681                                          UTF-8 bytes may follow. */
 682                 }
 683         }
 684         if (p-buf < newlen)
 685                 _PyString_Resize(&v, p - buf);
 686         return v;
 687   failed:
 688         Py_DECREF(v);
 689         return NULL;
 690 }
 691
 692 /* -------------------------------------------------------------------- */
 693 /* object api */
 694
 695 static Py_ssize_t
 696 string_getsize(register PyObject *op)
 697 {
 698         char *s;
 699         Py_ssize_t len;
 700         if (PyString_AsStringAndSize(op, &s, &len))
 701                 return -1;
 702         return len;
 703 }
 704
 705 static /*const*/ char *
 706 string_getbuffer(register PyObject *op)
 707 {
 708         char *s;
 709         Py_ssize_t len;
 710         if (PyString_AsStringAndSize(op, &s, &len))
 711                 return NULL;
 712         return s;
 713 }
 714
 715 Py_ssize_t
 716 PyString_Size(register PyObject *op)
 717 {
 718         if (!PyString_Check(op))
 719                 return string_getsize(op);
 720         return Py_Size(op);
 721 }
 722
 723 /*const*/ char *
 724 PyString_AsString(register PyObject *op)
 725 {
 726         if (!PyString_Check(op))
 727                 return string_getbuffer(op);
 728         return ((PyStringObject *)op) -> ob_sval;
 729 }
 730
 731 int
 732 PyString_AsStringAndSize(register PyObject *obj,
 733                          register char **s,
 734                          register Py_ssize_t *len)
 735 {
 736         if (s == NULL) {
 737                 PyErr_BadInternalCall();
 738                 return -1;
 739         }
 740
 741         if (!PyString_Check(obj)) {
 742 #ifdef Py_USING_UNICODE
 743                 if (PyUnicode_Check(obj)) {
 744                         obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
 745                         if (obj == NULL)
 746                                 return -1;
 747                 }
 748                 else
 749 #endif
 750                 {
 751                         PyErr_Format(PyExc_TypeError,
 752                                      "expected string or Unicode object, "
 753                                      "%.200s found", Py_Type(obj)->tp_name);
 754                         return -1;
 755                 }
 756         }
 757
 758         *s = PyString_AS_STRING(obj);
 759         if (len != NULL)
 760                 *len = PyString_GET_SIZE(obj);
 761         else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
 762                 PyErr_SetString(PyExc_TypeError,
 763                                 "expected string without null bytes");
 764                 return -1;
 765         }
 766         return 0;
 767 }
 768
 769 /* -------------------------------------------------------------------- */
 770 /* Methods */
 771
 772 #define STRINGLIB_CHAR char
 773
 774 #define STRINGLIB_CMP memcmp
 775 #define STRINGLIB_LEN PyString_GET_SIZE
 776 #define STRINGLIB_NEW PyString_FromStringAndSize
 777 #define STRINGLIB_STR PyString_AS_STRING
 778
 779 #define STRINGLIB_EMPTY nullstring
 780
 781 #include "stringlib/fastsearch.h"
 782
 783 #include "stringlib/count.h"
 784 #include "stringlib/find.h"
 785 #include "stringlib/partition.h"
 786
 787
 788 static int
 789 string_print(PyStringObject *op, FILE *fp, int flags)
 790 {
 791         Py_ssize_t i, str_len;
 792         char c;
 793         int quote;
 794
 795         /* XXX Ought to check for interrupts when writing long strings */
 796         if (! PyString_CheckExact(op)) {
 797                 int ret;
 798                 /* A str subclass may have its own __str__ method. */
 799                 op = (PyStringObject *) PyObject_Str((PyObject *)op);
 800                 if (op == NULL)
 801                         return -1;
 802                 ret = string_print(op, fp, flags);
 803                 Py_DECREF(op);
 804                 return ret;
 805         }
 806         if (flags & Py_PRINT_RAW) {
 807                 char *data = op->ob_sval;
 808                 Py_ssize_t size = Py_Size(op);
 809                 Py_BEGIN_ALLOW_THREADS
 810                 while (size > INT_MAX) {
 811                         /* Very long strings cannot be written atomically.
 812                          * But don't write exactly INT_MAX bytes at a time
 813                          * to avoid memory aligment issues.
 814                          */
 815                         const int chunk_size = INT_MAX & ~0x3FFF;
 816                         fwrite(data, 1, chunk_size, fp);
 817                         data += chunk_size;
 818                         size -= chunk_size;
 819                 }
 820 #ifdef __VMS
 821                 if (size) fwrite(data, (int)size, 1, fp);
 822 #else
 823                 fwrite(data, 1, (int)size, fp);
 824 #endif
 825                 Py_END_ALLOW_THREADS
 826                 return 0;
 827         }
 828
 829         /* figure out which quote to use; single is preferred */
 830         quote = '\'';
 831         if (memchr(op->ob_sval, '\'', Py_Size(op)) &&
 832             !memchr(op->ob_sval, '"', Py_Size(op)))
 833                 quote = '"';
 834
 835         str_len = Py_Size(op);
 836         Py_BEGIN_ALLOW_THREADS
 837         fputc(quote, fp);
 838         for (i = 0; i < str_len; i++) {
 839                 /* Since strings are immutable and the caller should have a
 840                 reference, accessing the interal buffer should not be an issue
 841                 with the GIL released. */
 842                 c = op->ob_sval[i];
 843                 if (c == quote || c == '\\')
 844                         fprintf(fp, "\\%c", c);
 845                 else if (c == '\t')
 846                         fprintf(fp, "\\t");
 847                 else if (c == '\n')
 848                         fprintf(fp, "\\n");
 849                 else if (c == '\r')
 850                         fprintf(fp, "\\r");
 851                 else if (c < ' ' || c >= 0x7f)
 852                         fprintf(fp, "\\x%02x", c & 0xff);
 853                 else
 854                         fputc(c, fp);
 855         }
 856         fputc(quote, fp);
 857         Py_END_ALLOW_THREADS
 858         return 0;
 859 }
 860
 861 PyObject *
 862 PyString_Repr(PyObject *obj, int smartquotes)
 863 {
 864         register PyStringObject* op = (PyStringObject*) obj;
 865         size_t newsize = 2 + 4 * Py_Size(op);
 866         PyObject *v;
 867         if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_Size(op)) {
 868                 PyErr_SetString(PyExc_OverflowError,
 869                         "string is too large to make repr");
 870         }
 871         v = PyString_FromStringAndSize((char *)NULL, newsize);
 872         if (v == NULL) {
 873                 return NULL;
 874         }
 875         else {
 876                 register Py_ssize_t i;
 877                 register char c;
 878                 register char *p;
 879                 int quote;
 880
 881                 /* figure out which quote to use; single is preferred */
 882                 quote = '\'';
 883                 if (smartquotes &&
 884                     memchr(op->ob_sval, '\'', Py_Size(op)) &&
 885                     !memchr(op->ob_sval, '"', Py_Size(op)))
 886                         quote = '"';
 887
 888                 p = PyString_AS_STRING(v);
 889                 *p++ = quote;
 890                 for (i = 0; i < Py_Size(op); i++) {
 891                         /* There's at least enough room for a hex escape
 892                            and a closing quote. */
 893                         assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
 894                         c = op->ob_sval[i];
 895                         if (c == quote || c == '\\')
 896                                 *p++ = '\\', *p++ = c;
 897                         else if (c == '\t')
 898                                 *p++ = '\\', *p++ = 't';
 899                         else if (c == '\n')
 900                                 *p++ = '\\', *p++ = 'n';
 901                         else if (c == '\r')
 902                                 *p++ = '\\', *p++ = 'r';
 903                         else if (c < ' ' || c >= 0x7f) {
 904                                 /* For performance, we don't want to call
 905                                    PyOS_snprintf here (extra layers of
 906                                    function call). */
 907                                 sprintf(p, "\\x%02x", c & 0xff);
 908                                 p += 4;
 909                         }
 910                         else
 911                                 *p++ = c;
 912                 }
 913                 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
 914                 *p++ = quote;
 915                 *p = '\0';
 916                 _PyString_Resize(
 917                         &v, (p - PyString_AS_STRING(v)));
 918                 return v;
 919         }
 920 }
 921
 922 static PyObject *
 923 string_repr(PyObject *op)
 924 {
 925         return PyString_Repr(op, 1);
 926 }
 927
 928 static PyObject *
 929 string_str(PyObject *s)
 930 {
 931         assert(PyString_Check(s));
 932         if (PyString_CheckExact(s)) {
 933                 Py_INCREF(s);
 934                 return s;
 935         }
 936         else {
 937                 /* Subtype -- return genuine string with the same value. */
 938                 PyStringObject *t = (PyStringObject *) s;
 939                 return PyString_FromStringAndSize(t->ob_sval, Py_Size(t));
 940         }
 941 }
 942
 943 static Py_ssize_t
 944 string_length(PyStringObject *a)
 945 {
 946         return Py_Size(a);
 947 }
 948
 949 static PyObject *
 950 string_concat(register PyStringObject *a, register PyObject *bb)
 951 {
 952         register Py_ssize_t size;
 953         register PyStringObject *op;
 954         if (!PyString_Check(bb)) {
 955 #ifdef Py_USING_UNICODE
 956                 if (PyUnicode_Check(bb))
 957                     return PyUnicode_Concat((PyObject *)a, bb);
 958 #endif
 959                 PyErr_Format(PyExc_TypeError,
 960                              "cannot concatenate 'str' and '%.200s' objects",
 961                              Py_Type(bb)->tp_name);
 962                 return NULL;
 963         }
 964 #define b ((PyStringObject *)bb)
 965         /* Optimize cases with empty left or right operand */
 966         if ((Py_Size(a) == 0 || Py_Size(b) == 0) &&
 967             PyString_CheckExact(a) && PyString_CheckExact(b)) {
 968                 if (Py_Size(a) == 0) {
 969                         Py_INCREF(bb);
 970                         return bb;
 971                 }
 972                 Py_INCREF(a);
 973                 return (PyObject *)a;
 974         }
 975         size = Py_Size(a) + Py_Size(b);
 976         if (size < 0) {
 977                 PyErr_SetString(PyExc_OverflowError,
 978                                 "strings are too large to concat");
 979                 return NULL;
 980         }
 981
 982         /* Inline PyObject_NewVar */
 983         op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
 984         if (op == NULL)
 985                 return PyErr_NoMemory();
 986         PyObject_INIT_VAR(op, &PyString_Type, size);
 987         op->ob_shash = -1;
 988         op->ob_sstate = SSTATE_NOT_INTERNED;
 989         Py_MEMCPY(op->ob_sval, a->ob_sval, Py_Size(a));
 990         Py_MEMCPY(op->ob_sval + Py_Size(a), b->ob_sval, Py_Size(b));
 991         op->ob_sval[size] = '\0';
 992         return (PyObject *) op;
 993 #undef b
 994 }
 995
 996 static PyObject *
 997 string_repeat(register PyStringObject *a, register Py_ssize_t n)
 998 {
 999         register Py_ssize_t i;
1000         register Py_ssize_t j;
1001         register Py_ssize_t size;
1002         register PyStringObject *op;
1003         size_t nbytes;
1004         if (n < 0)
1005                 n = 0;
1006         /* watch out for overflows:  the size can overflow int,
1007          * and the # of bytes needed can overflow size_t
1008          */
1009         size = Py_Size(a) * n;
1010         if (n && size / n != Py_Size(a)) {
1011                 PyErr_SetString(PyExc_OverflowError,
1012                         "repeated string is too long");
1013                 return NULL;
1014         }
1015         if (size == Py_Size(a) && PyString_CheckExact(a)) {
1016                 Py_INCREF(a);
1017                 return (PyObject *)a;
1018         }
1019         nbytes = (size_t)size;
1020         if (nbytes + sizeof(PyStringObject) <= nbytes) {
1021                 PyErr_SetString(PyExc_OverflowError,
1022                         "repeated string is too long");
1023                 return NULL;
1024         }
1025         op = (PyStringObject *)
1026                 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
1027         if (op == NULL)
1028                 return PyErr_NoMemory();
1029         PyObject_INIT_VAR(op, &PyString_Type, size);
1030         op->ob_shash = -1;
1031         op->ob_sstate = SSTATE_NOT_INTERNED;
1032         op->ob_sval[size] = '\0';
1033         if (Py_Size(a) == 1 && n > 0) {
1034                 memset(op->ob_sval, a->ob_sval[0] , n);
1035                 return (PyObject *) op;
1036         }
1037         i = 0;
1038         if (i < size) {
1039                 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_Size(a));
1040                 i = Py_Size(a);
1041         }
1042         while (i < size) {
1043                 j = (i <= size-i)  ?  i  :  size-i;
1044                 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1045                 i += j;
1046         }
1047         return (PyObject *) op;
1048 }
1049
1050 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1051
1052 static PyObject *
1053 string_slice(register PyStringObject *a, register Py_ssize_t i,
1054              register Py_ssize_t j)
1055      /* j -- may be negative! */
1056 {
1057         if (i < 0)
1058                 i = 0;
1059         if (j < 0)
1060                 j = 0; /* Avoid signed/unsigned bug in next line */
1061         if (j > Py_Size(a))
1062                 j = Py_Size(a);
1063         if (i == 0 && j == Py_Size(a) && PyString_CheckExact(a)) {
1064                 /* It's the same as a */
1065                 Py_INCREF(a);
1066                 return (PyObject *)a;
1067         }
1068         if (j < i)
1069                 j = i;
1070         return PyString_FromStringAndSize(a->ob_sval + i, j-i);
1071 }
1072
1073 static int
1074 string_contains(PyObject *str_obj, PyObject *sub_obj)
1075 {
1076         if (!PyString_CheckExact(sub_obj)) {
1077 #ifdef Py_USING_UNICODE
1078                 if (PyUnicode_Check(sub_obj))
1079                         return PyUnicode_Contains(str_obj, sub_obj);
1080 #endif
1081                 if (!PyString_Check(sub_obj)) {
1082                         PyErr_Format(PyExc_TypeError,
1083                             "'in <string>' requires string as left operand, "
1084                             "not %.200s", Py_Type(sub_obj)->tp_name);
1085                         return -1;
1086                 }
1087         }
1088
1089         return stringlib_contains_obj(str_obj, sub_obj);
1090 }
1091
1092 static PyObject *
1093 string_item(PyStringObject *a, register Py_ssize_t i)
1094 {
1095         char pchar;
1096         PyObject *v;
1097         if (i < 0 || i >= Py_Size(a)) {
1098                 PyErr_SetString(PyExc_IndexError, "string index out of range");
1099                 return NULL;
1100         }
1101         pchar = a->ob_sval[i];
1102         v = (PyObject *)characters[pchar & UCHAR_MAX];
1103         if (v == NULL)
1104                 v = PyString_FromStringAndSize(&pchar, 1);
1105         else {
1106 #ifdef COUNT_ALLOCS
1107                 one_strings++;
1108 #endif
1109                 Py_INCREF(v);
1110         }
1111         return v;
1112 }
1113
1114 static PyObject*
1115 string_richcompare(PyStringObject *a, PyStringObject *b, int op)
1116 {
1117         int c;
1118         Py_ssize_t len_a, len_b;
1119         Py_ssize_t min_len;
1120         PyObject *result;
1121
1122         /* Make sure both arguments are strings. */
1123         if (!(PyString_Check(a) && PyString_Check(b))) {
1124                 result = Py_NotImplemented;
1125                 goto out;
1126         }
1127         if (a == b) {
1128                 switch (op) {
1129                 case Py_EQ:case Py_LE:case Py_GE:
1130                         result = Py_True;
1131                         goto out;
1132                 case Py_NE:case Py_LT:case Py_GT:
1133                         result = Py_False;
1134                         goto out;
1135                 }
1136         }
1137         if (op == Py_EQ) {
1138                 /* Supporting Py_NE here as well does not save
1139                    much time, since Py_NE is rarely used.  */
1140                 if (Py_Size(a) == Py_Size(b)
1141                     && (a->ob_sval[0] == b->ob_sval[0]
1142                         && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0)) {
1143                         result = Py_True;
1144                 } else {
1145                         result = Py_False;
1146                 }
1147                 goto out;
1148         }
1149         len_a = Py_Size(a); len_b = Py_Size(b);
1150         min_len = (len_a < len_b) ? len_a : len_b;
1151         if (min_len > 0) {
1152                 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1153                 if (c==0)
1154                         c = memcmp(a->ob_sval, b->ob_sval, min_len);
1155         } else
1156                 c = 0;
1157         if (c == 0)
1158                 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1159         switch (op) {
1160         case Py_LT: c = c <  0; break;
1161         case Py_LE: c = c <= 0; break;
1162         case Py_EQ: assert(0);  break; /* unreachable */
1163         case Py_NE: c = c != 0; break;
1164         case Py_GT: c = c >  0; break;
1165         case Py_GE: c = c >= 0; break;
1166         default:
1167                 result = Py_NotImplemented;
1168                 goto out;
1169         }
1170         result = c ? Py_True : Py_False;
1171   out:
1172         Py_INCREF(result);
1173         return result;
1174 }
1175
1176 int
1177 _PyString_Eq(PyObject *o1, PyObject *o2)
1178 {
1179         PyStringObject *a = (PyStringObject*) o1;
1180         PyStringObject *b = (PyStringObject*) o2;
1181         return Py_Size(a) == Py_Size(b)
1182           && *a->ob_sval == *b->ob_sval
1183           && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0;
1184 }
1185
1186 static long
1187 string_hash(PyStringObject *a)
1188 {
1189         register Py_ssize_t len;
1190         register unsigned char *p;
1191         register long x;
1192
1193         if (a->ob_shash != -1)
1194                 return a->ob_shash;
1195         len = Py_Size(a);
1196         p = (unsigned char *) a->ob_sval;
1197         x = *p << 7;
1198         while (--len >= 0)
1199                 x = (1000003*x) ^ *p++;
1200         x ^= Py_Size(a);
1201         if (x == -1)
1202                 x = -2;
1203         a->ob_shash = x;
1204         return x;
1205 }
1206
1207 static PyObject*
1208 string_subscript(PyStringObject* self, PyObject* item)
1209 {
1210         if (PyIndex_Check(item)) {
1211                 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1212                 if (i == -1 && PyErr_Occurred())
1213                         return NULL;
1214                 if (i < 0)
1215                         i += PyString_GET_SIZE(self);
1216                 return string_item(self, i);
1217         }
1218         else if (PySlice_Check(item)) {
1219                 Py_ssize_t start, stop, step, slicelength, cur, i;
1220                 char* source_buf;
1221                 char* result_buf;
1222                 PyObject* result;
1223
1224                 if (PySlice_GetIndicesEx((PySliceObject*)item,
1225                                  PyString_GET_SIZE(self),
1226                                  &start, &stop, &step, &slicelength) < 0) {
1227                         return NULL;
1228                 }
1229
1230                 if (slicelength <= 0) {
1231                         return PyString_FromStringAndSize("", 0);
1232                 }
1233                 else if (start == 0 && step == 1 &&
1234                          slicelength == PyString_GET_SIZE(self) &&
1235                          PyString_CheckExact(self)) {
1236                         Py_INCREF(self);
1237                         return (PyObject *)self;
1238                 }
1239                 else if (step == 1) {
1240                         return PyString_FromStringAndSize(
1241                                 PyString_AS_STRING(self) + start,
1242                                 slicelength);
1243                 }
1244                 else {
1245                         source_buf = PyString_AsString((PyObject*)self);
1246                         result_buf = (char *)PyMem_Malloc(slicelength);
1247                         if (result_buf == NULL)
1248                                 return PyErr_NoMemory();
1249
1250                         for (cur = start, i = 0; i < slicelength;
1251                              cur += step, i++) {
1252                                 result_buf[i] = source_buf[cur];
1253                         }
1254
1255                         result = PyString_FromStringAndSize(result_buf,
1256                                                             slicelength);
1257                         PyMem_Free(result_buf);
1258                         return result;
1259                 }
1260         }
1261         else {
1262                 PyErr_Format(PyExc_TypeError,
1263                              "string indices must be integers, not %.200s",
1264                              Py_Type(item)->tp_name);
1265                 return NULL;
1266         }
1267 }
1268
1269 static Py_ssize_t
1270 string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1271 {
1272         if ( index != 0 ) {
1273                 PyErr_SetString(PyExc_SystemError,
1274                                 "accessing non-existent string segment");
1275                 return -1;
1276         }
1277         *ptr = (void *)self->ob_sval;
1278         return Py_Size(self);
1279 }
1280
1281 static Py_ssize_t
1282 string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1283 {
1284         PyErr_SetString(PyExc_TypeError,
1285                         "Cannot use string as modifiable buffer");
1286         return -1;
1287 }
1288
1289 static Py_ssize_t
1290 string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
1291 {
1292         if ( lenp )
1293                 *lenp = Py_Size(self);
1294         return 1;
1295 }
1296
1297 static Py_ssize_t
1298 string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
1299 {
1300         if ( index != 0 ) {
1301                 PyErr_SetString(PyExc_SystemError,
1302                                 "accessing non-existent string segment");
1303                 return -1;
1304         }
1305         *ptr = self->ob_sval;
1306         return Py_Size(self);
1307 }
1308
1309 static PySequenceMethods string_as_sequence = {
1310         (lenfunc)string_length, /*sq_length*/
1311         (binaryfunc)string_concat, /*sq_concat*/
1312         (ssizeargfunc)string_repeat, /*sq_repeat*/
1313         (ssizeargfunc)string_item, /*sq_item*/
1314         (ssizessizeargfunc)string_slice, /*sq_slice*/
1315         0,              /*sq_ass_item*/
1316         0,              /*sq_ass_slice*/
1317         (objobjproc)string_contains /*sq_contains*/
1318 };
1319
1320 static PyMappingMethods string_as_mapping = {
1321         (lenfunc)string_length,
1322         (binaryfunc)string_subscript,
1323         0,
1324 };
1325
1326 static PyBufferProcs string_as_buffer = {
1327         (readbufferproc)string_buffer_getreadbuf,
1328         (writebufferproc)string_buffer_getwritebuf,
1329         (segcountproc)string_buffer_getsegcount,
1330         (charbufferproc)string_buffer_getcharbuf,
1331 };
1332
1333
1334 \f
1335 #define LEFTSTRIP 0
1336 #define RIGHTSTRIP 1
1337 #define BOTHSTRIP 2
1338
1339 /* Arrays indexed by above */
1340 static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1341
1342 #define STRIPNAME(i) (stripformat[i]+3)
1343
1344
1345 /* Don't call if length < 2 */
1346 #define Py_STRING_MATCH(target, offset, pattern, length)        \
1347   (target[offset] == pattern[0] &&                              \
1348    target[offset+length-1] == pattern[length-1] &&              \
1349    !memcmp(target+offset+1, pattern+1, length-2) )
1350
1351
1352 /* Overallocate the initial list to reduce the number of reallocs for small
1353    split sizes.  Eg, "A A A A A A A A A A".split() (10 elements) has three
1354    resizes, to sizes 4, 8, then 16.  Most observed string splits are for human
1355    text (roughly 11 words per line) and field delimited data (usually 1-10
1356    fields).  For large strings the split algorithms are bandwidth limited
1357    so increasing the preallocation likely will not improve things.*/
1358
1359 #define MAX_PREALLOC 12
1360
1361 /* 5 splits gives 6 elements */
1362 #define PREALLOC_SIZE(maxsplit) \
1363         (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1364
1365 #define SPLIT_APPEND(data, left, right)                         \
1366         str = PyString_FromStringAndSize((data) + (left),       \
1367                                          (right) - (left));     \
1368         if (str == NULL)                                        \
1369                 goto onError;                                   \
1370         if (PyList_Append(list, str)) {                         \
1371                 Py_DECREF(str);                                 \
1372                 goto onError;                                   \
1373         }                                                       \
1374         else                                                    \
1375                 Py_DECREF(str);
1376
1377 #define SPLIT_ADD(data, left, right) {                          \
1378         str = PyString_FromStringAndSize((data) + (left),       \
1379                                          (right) - (left));     \
1380         if (str == NULL)                                        \
1381                 goto onError;                                   \
1382         if (count < MAX_PREALLOC) {                             \
1383                 PyList_SET_ITEM(list, count, str);              \
1384         } else {                                                \
1385                 if (PyList_Append(list, str)) {                 \
1386                         Py_DECREF(str);                         \
1387                         goto onError;                           \
1388                 }                                               \
1389                 else                                            \
1390                         Py_DECREF(str);                         \
1391         }                                                       \
1392         count++; }
1393
1394 /* Always force the list to the expected size. */
1395 #define FIX_PREALLOC_SIZE(list) Py_Size(list) = count
1396
1397 #define SKIP_SPACE(s, i, len)    { while (i<len &&  isspace(Py_CHARMASK(s[i]))) i++; }
1398 #define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1399 #define RSKIP_SPACE(s, i)        { while (i>=0  &&  isspace(Py_CHARMASK(s[i]))) i--; }
1400 #define RSKIP_NONSPACE(s, i)     { while (i>=0  && !isspace(Py_CHARMASK(s[i]))) i--; }
1401
1402 Py_LOCAL_INLINE(PyObject *)
1403 split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
1404 {
1405         Py_ssize_t i, j, count=0;
1406         PyObject *str;
1407         PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
1408
1409         if (list == NULL)
1410                 return NULL;
1411
1412         i = j = 0;
1413
1414         while (maxsplit-- > 0) {
1415                 SKIP_SPACE(s, i, len);
1416                 if (i==len) break;
1417                 j = i; i++;
1418                 SKIP_NONSPACE(s, i, len);
1419                 SPLIT_ADD(s, j, i);
1420         }
1421
1422         if (i < len) {
1423                 /* Only occurs when maxsplit was reached */
1424                 /* Skip any remaining whitespace and copy to end of string */
1425                 SKIP_SPACE(s, i, len);
1426                 if (i != len)
1427                         SPLIT_ADD(s, i, len);
1428         }
1429         FIX_PREALLOC_SIZE(list);
1430         return list;
1431   onError:
1432         Py_DECREF(list);
1433         return NULL;
1434 }
1435
1436 Py_LOCAL_INLINE(PyObject *)
1437 split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
1438 {
1439         register Py_ssize_t i, j, count=0;
1440         PyObject *str;
1441         PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
1442
1443         if (list == NULL)
1444                 return NULL;
1445
1446         i = j = 0;
1447         while ((j < len) && (maxcount-- > 0)) {
1448                 for(; j<len; j++) {
1449                         /* I found that using memchr makes no difference */
1450                         if (s[j] == ch) {
1451                                 SPLIT_ADD(s, i, j);
1452                                 i = j = j + 1;
1453                                 break;
1454                         }
1455                 }
1456         }
1457         if (i <= len) {
1458                 SPLIT_ADD(s, i, len);
1459         }
1460         FIX_PREALLOC_SIZE(list);
1461         return list;
1462
1463   onError:
1464         Py_DECREF(list);
1465         return NULL;
1466 }
1467
1468 PyDoc_STRVAR(split__doc__,
1469 "S.split([sep [,maxsplit]]) -> list of strings\n\
1470 \n\
1471 Return a list of the words in the string S, using sep as the\n\
1472 delimiter string.  If maxsplit is given, at most maxsplit\n\
1473 splits are done. If sep is not specified or is None, any\n\
1474 whitespace string is a separator.");
1475
1476 static PyObject *
1477 string_split(PyStringObject *self, PyObject *args)
1478 {
1479         Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1480         Py_ssize_t maxsplit = -1, count=0;
1481         const char *s = PyString_AS_STRING(self), *sub;
1482         PyObject *list, *str, *subobj = Py_None;
1483 #ifdef USE_FAST
1484         Py_ssize_t pos;
1485 #endif
1486
1487         if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1488                 return NULL;
1489         if (maxsplit < 0)
1490                 maxsplit = PY_SSIZE_T_MAX;
1491         if (subobj == Py_None)
1492                 return split_whitespace(s, len, maxsplit);
1493         if (PyString_Check(subobj)) {
1494                 sub = PyString_AS_STRING(subobj);
1495                 n = PyString_GET_SIZE(subobj);
1496         }
1497 #ifdef Py_USING_UNICODE
1498         else if (PyUnicode_Check(subobj))
1499                 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1500 #endif
1501         else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1502                 return NULL;
1503
1504         if (n == 0) {
1505                 PyErr_SetString(PyExc_ValueError, "empty separator");
1506                 return NULL;
1507         }
1508         else if (n == 1)
1509                 return split_char(s, len, sub[0], maxsplit);
1510
1511         list = PyList_New(PREALLOC_SIZE(maxsplit));
1512         if (list == NULL)
1513                 return NULL;
1514
1515 #ifdef USE_FAST
1516         i = j = 0;
1517         while (maxsplit-- > 0) {
1518                 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1519                 if (pos < 0)
1520                         break;
1521                 j = i+pos;
1522                 SPLIT_ADD(s, i, j);
1523                 i = j + n;
1524         }
1525 #else
1526         i = j = 0;
1527         while ((j+n <= len) && (maxsplit-- > 0)) {
1528                 for (; j+n <= len; j++) {
1529                         if (Py_STRING_MATCH(s, j, sub, n)) {
1530                                 SPLIT_ADD(s, i, j);
1531                                 i = j = j + n;
1532                                 break;
1533                         }
1534                 }
1535         }
1536 #endif
1537         SPLIT_ADD(s, i, len);
1538         FIX_PREALLOC_SIZE(list);
1539         return list;
1540
1541  onError:
1542         Py_DECREF(list);
1543         return NULL;
1544 }
1545
1546 PyDoc_STRVAR(partition__doc__,
1547 "S.partition(sep) -> (head, sep, tail)\n\
1548 \n\
1549 Searches for the separator sep in S, and returns the part before it,\n\
1550 the separator itself, and the part after it.  If the separator is not\n\
1551 found, returns S and two empty strings.");
1552
1553 static PyObject *
1554 string_partition(PyStringObject *self, PyObject *sep_obj)
1555 {
1556         const char *sep;
1557         Py_ssize_t sep_len;
1558
1559         if (PyString_Check(sep_obj)) {
1560                 sep = PyString_AS_STRING(sep_obj);
1561                 sep_len = PyString_GET_SIZE(sep_obj);
1562         }
1563 #ifdef Py_USING_UNICODE
1564         else if (PyUnicode_Check(sep_obj))
1565                 return PyUnicode_Partition((PyObject *) self, sep_obj);
1566 #endif
1567         else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1568                 return NULL;
1569
1570         return stringlib_partition(
1571                 (PyObject*) self,
1572                 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1573                 sep_obj, sep, sep_len
1574                 );
1575 }
1576
1577 PyDoc_STRVAR(rpartition__doc__,
1578 "S.rpartition(sep) -> (tail, sep, head)\n\
1579 \n\
1580 Searches for the separator sep in S, starting at the end of S, and returns\n\
1581 the part before it, the separator itself, and the part after it.  If the\n\
1582 separator is not found, returns two empty strings and S.");
1583
1584 static PyObject *
1585 string_rpartition(PyStringObject *self, PyObject *sep_obj)
1586 {
1587         const char *sep;
1588         Py_ssize_t sep_len;
1589
1590         if (PyString_Check(sep_obj)) {
1591                 sep = PyString_AS_STRING(sep_obj);
1592                 sep_len = PyString_GET_SIZE(sep_obj);
1593         }
1594 #ifdef Py_USING_UNICODE
1595         else if (PyUnicode_Check(sep_obj))
1596                 return PyUnicode_Partition((PyObject *) self, sep_obj);
1597 #endif
1598         else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1599                 return NULL;
1600
1601         return stringlib_rpartition(
1602                 (PyObject*) self,
1603                 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1604                 sep_obj, sep, sep_len
1605                 );
1606 }
1607
1608 Py_LOCAL_INLINE(PyObject *)
1609 rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
1610 {
1611         Py_ssize_t i, j, count=0;
1612         PyObject *str;
1613         PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
1614
1615         if (list == NULL)
1616                 return NULL;
1617
1618         i = j = len-1;
1619
1620         while (maxsplit-- > 0) {
1621                 RSKIP_SPACE(s, i);
1622                 if (i<0) break;
1623                 j = i; i--;
1624                 RSKIP_NONSPACE(s, i);
1625                 SPLIT_ADD(s, i + 1, j + 1);
1626         }
1627         if (i >= 0) {
1628                 /* Only occurs when maxsplit was reached */
1629                 /* Skip any remaining whitespace and copy to beginning of string */
1630                 RSKIP_SPACE(s, i);
1631                 if (i >= 0)
1632                         SPLIT_ADD(s, 0, i + 1);
1633
1634         }
1635         FIX_PREALLOC_SIZE(list);
1636         if (PyList_Reverse(list) < 0)
1637                 goto onError;
1638         return list;
1639   onError:
1640         Py_DECREF(list);
1641         return NULL;
1642 }
1643
1644 Py_LOCAL_INLINE(PyObject *)
1645 rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
1646 {
1647         register Py_ssize_t i, j, count=0;
1648         PyObject *str;
1649         PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
1650
1651         if (list == NULL)
1652                 return NULL;
1653
1654         i = j = len - 1;
1655         while ((i >= 0) && (maxcount-- > 0)) {
1656                 for (; i >= 0; i--) {
1657                         if (s[i] == ch) {
1658                                 SPLIT_ADD(s, i + 1, j + 1);
1659                                 j = i = i - 1;
1660                                 break;
1661                         }
1662                 }
1663         }
1664         if (j >= -1) {
1665                 SPLIT_ADD(s, 0, j + 1);
1666         }
1667         FIX_PREALLOC_SIZE(list);
1668         if (PyList_Reverse(list) < 0)
1669                 goto onError;
1670         return list;
1671
1672  onError:
1673         Py_DECREF(list);
1674         return NULL;
1675 }
1676
1677 PyDoc_STRVAR(rsplit__doc__,
1678 "S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1679 \n\
1680 Return a list of the words in the string S, using sep as the\n\
1681 delimiter string, starting at the end of the string and working\n\
1682 to the front.  If maxsplit is given, at most maxsplit splits are\n\
1683 done. If sep is not specified or is None, any whitespace string\n\
1684 is a separator.");
1685
1686 static PyObject *
1687 string_rsplit(PyStringObject *self, PyObject *args)
1688 {
1689         Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1690         Py_ssize_t maxsplit = -1, count=0;
1691         const char *s = PyString_AS_STRING(self), *sub;
1692         PyObject *list, *str, *subobj = Py_None;
1693
1694         if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1695                 return NULL;
1696         if (maxsplit < 0)
1697                 maxsplit = PY_SSIZE_T_MAX;
1698         if (subobj == Py_None)
1699                 return rsplit_whitespace(s, len, maxsplit);
1700         if (PyString_Check(subobj)) {
1701                 sub = PyString_AS_STRING(subobj);
1702                 n = PyString_GET_SIZE(subobj);
1703         }
1704 #ifdef Py_USING_UNICODE
1705         else if (PyUnicode_Check(subobj))
1706                 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1707 #endif
1708         else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1709                 return NULL;
1710
1711         if (n == 0) {
1712                 PyErr_SetString(PyExc_ValueError, "empty separator");
1713                 return NULL;
1714         }
1715         else if (n == 1)
1716                 return rsplit_char(s, len, sub[0], maxsplit);
1717
1718         list = PyList_New(PREALLOC_SIZE(maxsplit));
1719         if (list == NULL)
1720                 return NULL;
1721
1722         j = len;
1723         i = j - n;
1724
1725         while ( (i >= 0) && (maxsplit-- > 0) ) {
1726                 for (; i>=0; i--) {
1727                         if (Py_STRING_MATCH(s, i, sub, n)) {
1728                                 SPLIT_ADD(s, i + n, j);
1729                                 j = i;
1730                                 i -= n;
1731                                 break;
1732                         }
1733                 }
1734         }
1735         SPLIT_ADD(s, 0, j);
1736         FIX_PREALLOC_SIZE(list);
1737         if (PyList_Reverse(list) < 0)
1738                 goto onError;
1739         return list;
1740
1741 onError:
1742         Py_DECREF(list);
1743         return NULL;
1744 }
1745
1746
1747 PyDoc_STRVAR(join__doc__,
1748 "S.join(sequence) -> string\n\
1749 \n\
1750 Return a string which is the concatenation of the strings in the\n\
1751 sequence.  The separator between elements is S.");
1752
1753 static PyObject *
1754 string_join(PyStringObject *self, PyObject *orig)
1755 {
1756         char *sep = PyString_AS_STRING(self);
1757         const Py_ssize_t seplen = PyString_GET_SIZE(self);
1758         PyObject *res = NULL;
1759         char *p;
1760         Py_ssize_t seqlen = 0;
1761         size_t sz = 0;
1762         Py_ssize_t i;
1763         PyObject *seq, *item;
1764
1765         seq = PySequence_Fast(orig, "");
1766         if (seq == NULL) {
1767                 return NULL;
1768         }
1769
1770         seqlen = PySequence_Size(seq);
1771         if (seqlen == 0) {
1772                 Py_DECREF(seq);
1773                 return PyString_FromString("");
1774         }
1775         if (seqlen == 1) {
1776                 item = PySequence_Fast_GET_ITEM(seq, 0);
1777                 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1778                         Py_INCREF(item);
1779                         Py_DECREF(seq);
1780                         return item;
1781                 }
1782         }
1783
1784         /* There are at least two things to join, or else we have a subclass
1785          * of the builtin types in the sequence.
1786          * Do a pre-pass to figure out the total amount of space we'll
1787          * need (sz), see whether any argument is absurd, and defer to
1788          * the Unicode join if appropriate.
1789          */
1790         for (i = 0; i < seqlen; i++) {
1791                 const size_t old_sz = sz;
1792                 item = PySequence_Fast_GET_ITEM(seq, i);
1793                 if (!PyString_Check(item)){
1794 #ifdef Py_USING_UNICODE
1795                         if (PyUnicode_Check(item)) {
1796                                 /* Defer to Unicode join.
1797                                  * CAUTION:  There's no gurantee that the
1798                                  * original sequence can be iterated over
1799                                  * again, so we must pass seq here.
1800                                  */
1801                                 PyObject *result;
1802                                 result = PyUnicode_Join((PyObject *)self, seq);
1803                                 Py_DECREF(seq);
1804                                 return result;
1805                         }
1806 #endif
1807                         PyErr_Format(PyExc_TypeError,
1808                                      "sequence item %zd: expected string,"
1809                                      " %.80s found",
1810                                      i, Py_Type(item)->tp_name);
1811                         Py_DECREF(seq);
1812                         return NULL;
1813                 }
1814                 sz += PyString_GET_SIZE(item);
1815                 if (i != 0)
1816                         sz += seplen;
1817                 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1818                         PyErr_SetString(PyExc_OverflowError,
1819                                 "join() result is too long for a Python string");
1820                         Py_DECREF(seq);
1821                         return NULL;
1822                 }
1823         }
1824
1825         /* Allocate result space. */
1826         res = PyString_FromStringAndSize((char*)NULL, sz);
1827         if (res == NULL) {
1828                 Py_DECREF(seq);
1829                 return NULL;
1830         }
1831
1832         /* Catenate everything. */
1833         p = PyString_AS_STRING(res);
1834         for (i = 0; i < seqlen; ++i) {
1835                 size_t n;
1836                 item = PySequence_Fast_GET_ITEM(seq, i);
1837                 n = PyString_GET_SIZE(item);
1838                 Py_MEMCPY(p, PyString_AS_STRING(item), n);
1839                 p += n;
1840                 if (i < seqlen - 1) {
1841                         Py_MEMCPY(p, sep, seplen);
1842                         p += seplen;
1843                 }
1844         }
1845
1846         Py_DECREF(seq);
1847         return res;
1848 }
1849
1850 PyObject *
1851 _PyString_Join(PyObject *sep, PyObject *x)
1852 {
1853         assert(sep != NULL && PyString_Check(sep));
1854         assert(x != NULL);
1855         return string_join((PyStringObject *)sep, x);
1856 }
1857
1858 Py_LOCAL_INLINE(void)
1859 string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1860 {
1861         if (*end > len)
1862                 *end = len;
1863         else if (*end < 0)
1864                 *end += len;
1865         if (*end < 0)
1866                 *end = 0;
1867         if (*start < 0)
1868                 *start += len;
1869         if (*start < 0)
1870                 *start = 0;
1871 }
1872
1873 Py_LOCAL_INLINE(Py_ssize_t)
1874 string_find_internal(PyStringObject *self, PyObject *args, int dir)
1875 {
1876         PyObject *subobj;
1877         const char *sub;
1878         Py_ssize_t sub_len;
1879         Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1880
1881         if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1882                 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1883                 return -2;
1884         if (PyString_Check(subobj)) {
1885                 sub = PyString_AS_STRING(subobj);
1886                 sub_len = PyString_GET_SIZE(subobj);
1887         }
1888 #ifdef Py_USING_UNICODE
1889         else if (PyUnicode_Check(subobj))
1890                 return PyUnicode_Find(
1891                         (PyObject *)self, subobj, start, end, dir);
1892 #endif
1893         else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1894                 /* XXX - the "expected a character buffer object" is pretty
1895                    confusing for a non-expert.  remap to something else ? */
1896                 return -2;
1897
1898         if (dir > 0)
1899                 return stringlib_find_slice(
1900                         PyString_AS_STRING(self), PyString_GET_SIZE(self),
1901                         sub, sub_len, start, end);
1902         else
1903                 return stringlib_rfind_slice(
1904                         PyString_AS_STRING(self), PyString_GET_SIZE(self),
1905                         sub, sub_len, start, end);
1906 }
1907
1908
1909 PyDoc_STRVAR(find__doc__,
1910 "S.find(sub [,start [,end]]) -> int\n\
1911 \n\
1912 Return the lowest index in S where substring sub is found,\n\
1913 such that sub is contained within s[start:end].  Optional\n\
1914 arguments start and end are interpreted as in slice notation.\n\
1915 \n\
1916 Return -1 on failure.");
1917
1918 static PyObject *
1919 string_find(PyStringObject *self, PyObject *args)
1920 {
1921         Py_ssize_t result = string_find_internal(self, args, +1);
1922         if (result == -2)
1923                 return NULL;
1924         return PyInt_FromSsize_t(result);
1925 }
1926
1927
1928 PyDoc_STRVAR(index__doc__,
1929 "S.index(sub [,start [,end]]) -> int\n\
1930 \n\
1931 Like S.find() but raise ValueError when the substring is not found.");
1932
1933 static PyObject *
1934 string_index(PyStringObject *self, PyObject *args)
1935 {
1936         Py_ssize_t result = string_find_internal(self, args, +1);
1937         if (result == -2)
1938                 return NULL;
1939         if (result == -1) {
1940                 PyErr_SetString(PyExc_ValueError,
1941                                 "substring not found");
1942                 return NULL;
1943         }
1944         return PyInt_FromSsize_t(result);
1945 }
1946
1947
1948 PyDoc_STRVAR(rfind__doc__,
1949 "S.rfind(sub [,start [,end]]) -> int\n\
1950 \n\
1951 Return the highest index in S where substring sub is found,\n\
1952 such that sub is contained within s[start:end].  Optional\n\
1953 arguments start and end are interpreted as in slice notation.\n\
1954 \n\
1955 Return -1 on failure.");
1956
1957 static PyObject *
1958 string_rfind(PyStringObject *self, PyObject *args)
1959 {
1960         Py_ssize_t result = string_find_internal(self, args, -1);
1961         if (result == -2)
1962                 return NULL;
1963         return PyInt_FromSsize_t(result);
1964 }
1965
1966
1967 PyDoc_STRVAR(rindex__doc__,
1968 "S.rindex(sub [,start [,end]]) -> int\n\
1969 \n\
1970 Like S.rfind() but raise ValueError when the substring is not found.");
1971
1972 static PyObject *
1973 string_rindex(PyStringObject *self, PyObject *args)
1974 {
1975         Py_ssize_t result = string_find_internal(self, args, -1);
1976         if (result == -2)
1977                 return NULL;
1978         if (result == -1) {
1979                 PyErr_SetString(PyExc_ValueError,
1980                                 "substring not found");
1981                 return NULL;
1982         }
1983         return PyInt_FromSsize_t(result);
1984 }
1985
1986
1987 Py_LOCAL_INLINE(PyObject *)
1988 do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1989 {
1990         char *s = PyString_AS_STRING(self);
1991         Py_ssize_t len = PyString_GET_SIZE(self);
1992         char *sep = PyString_AS_STRING(sepobj);
1993         Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1994         Py_ssize_t i, j;
1995
1996         i = 0;
1997         if (striptype != RIGHTSTRIP) {
1998                 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1999                         i++;
2000                 }
2001         }
2002
2003         j = len;
2004         if (striptype != LEFTSTRIP) {
2005                 do {
2006                         j--;
2007                 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2008                 j++;
2009         }
2010
2011         if (i == 0 && j == len && PyString_CheckExact(self)) {
2012                 Py_INCREF(self);
2013                 return (PyObject*)self;
2014         }
2015         else
2016                 return PyString_FromStringAndSize(s+i, j-i);
2017 }
2018
2019
2020 Py_LOCAL_INLINE(PyObject *)
2021 do_strip(PyStringObject *self, int striptype)
2022 {
2023         char *s = PyString_AS_STRING(self);
2024         Py_ssize_t len = PyString_GET_SIZE(self), i, j;
2025
2026         i = 0;
2027         if (striptype != RIGHTSTRIP) {
2028                 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2029                         i++;
2030                 }
2031         }
2032
2033         j = len;
2034         if (striptype != LEFTSTRIP) {
2035                 do {
2036                         j--;
2037                 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2038                 j++;
2039         }
2040
2041         if (i == 0 && j == len && PyString_CheckExact(self)) {
2042                 Py_INCREF(self);
2043                 return (PyObject*)self;
2044         }
2045         else
2046                 return PyString_FromStringAndSize(s+i, j-i);
2047 }
2048
2049
2050 Py_LOCAL_INLINE(PyObject *)
2051 do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2052 {
2053         PyObject *sep = NULL;
2054
2055         if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
2056                 return NULL;
2057
2058         if (sep != NULL && sep != Py_None) {
2059                 if (PyString_Check(sep))
2060                         return do_xstrip(self, striptype, sep);
2061 #ifdef Py_USING_UNICODE
2062                 else if (PyUnicode_Check(sep)) {
2063                         PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2064                         PyObject *res;
2065                         if (uniself==NULL)
2066                                 return NULL;
2067                         res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2068                                 striptype, sep);
2069                         Py_DECREF(uniself);
2070                         return res;
2071                 }
2072 #endif
2073                 PyErr_Format(PyExc_TypeError,
2074 #ifdef Py_USING_UNICODE
2075                              "%s arg must be None, str or unicode",
2076 #else
2077                              "%s arg must be None or str",
2078 #endif
2079                              STRIPNAME(striptype));
2080                 return NULL;
2081         }
2082
2083         return do_strip(self, striptype);
2084 }
2085
2086
2087 PyDoc_STRVAR(strip__doc__,
2088 "S.strip([chars]) -> string or unicode\n\
2089 \n\
2090 Return a copy of the string S with leading and trailing\n\
2091 whitespace removed.\n\
2092 If chars is given and not None, remove characters in chars instead.\n\
2093 If chars is unicode, S will be converted to unicode before stripping");
2094
2095 static PyObject *
2096 string_strip(PyStringObject *self, PyObject *args)
2097 {
2098         if (PyTuple_GET_SIZE(args) == 0)
2099                 return do_strip(self, BOTHSTRIP); /* Common case */
2100         else
2101                 return do_argstrip(self, BOTHSTRIP, args);
2102 }
2103
2104
2105 PyDoc_STRVAR(lstrip__doc__,
2106 "S.lstrip([chars]) -> string or unicode\n\
2107 \n\
2108 Return a copy of the string S with leading whitespace removed.\n\
2109 If chars is given and not None, remove characters in chars instead.\n\
2110 If chars is unicode, S will be converted to unicode before stripping");
2111
2112 static PyObject *
2113 string_lstrip(PyStringObject *self, PyObject *args)
2114 {
2115         if (PyTuple_GET_SIZE(args) == 0)
2116                 return do_strip(self, LEFTSTRIP); /* Common case */
2117         else
2118                 return do_argstrip(self, LEFTSTRIP, args);
2119 }
2120
2121
2122 PyDoc_STRVAR(rstrip__doc__,
2123 "S.rstrip([chars]) -> string or unicode\n\
2124 \n\
2125 Return a copy of the string S with trailing whitespace removed.\n\
2126 If chars is given and not None, remove characters in chars instead.\n\
2127 If chars is unicode, S will be converted to unicode before stripping");
2128
2129 static PyObject *
2130 string_rstrip(PyStringObject *self, PyObject *args)
2131 {
2132         if (PyTuple_GET_SIZE(args) == 0)
2133                 return do_strip(self, RIGHTSTRIP); /* Common case */
2134         else
2135                 return do_argstrip(self, RIGHTSTRIP, args);
2136 }
2137
2138
2139 PyDoc_STRVAR(lower__doc__,
2140 "S.lower() -> string\n\
2141 \n\
2142 Return a copy of the string S converted to lowercase.");
2143
2144 /* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2145 #ifndef _tolower
2146 #define _tolower tolower
2147 #endif
2148
2149 static PyObject *
2150 string_lower(PyStringObject *self)
2151 {
2152         char *s;
2153         Py_ssize_t i, n = PyString_GET_SIZE(self);
2154         PyObject *newobj;
2155
2156         newobj = PyString_FromStringAndSize(NULL, n);
2157         if (!newobj)
2158                 return NULL;
2159
2160         s = PyString_AS_STRING(newobj);
2161
2162         Py_MEMCPY(s, PyString_AS_STRING(self), n);
2163
2164         for (i = 0; i < n; i++) {
2165                 int c = Py_CHARMASK(s[i]);
2166                 if (isupper(c))
2167                         s[i] = _tolower(c);
2168         }
2169
2170         return newobj;
2171 }
2172
2173 PyDoc_STRVAR(upper__doc__,
2174 "S.upper() -> string\n\
2175 \n\
2176 Return a copy of the string S converted to uppercase.");
2177
2178 #ifndef _toupper
2179 #define _toupper toupper
2180 #endif
2181
2182 static PyObject *
2183 string_upper(PyStringObject *self)
2184 {
2185         char *s;
2186         Py_ssize_t i, n = PyString_GET_SIZE(self);
2187         PyObject *newobj;
2188
2189         newobj = PyString_FromStringAndSize(NULL, n);
2190         if (!newobj)
2191                 return NULL;
2192
2193         s = PyString_AS_STRING(newobj);
2194
2195         Py_MEMCPY(s, PyString_AS_STRING(self), n);
2196
2197         for (i = 0; i < n; i++) {
2198                 int c = Py_CHARMASK(s[i]);
2199                 if (islower(c))
2200                         s[i] = _toupper(c);
2201         }
2202
2203         return newobj;
2204 }
2205
2206 PyDoc_STRVAR(title__doc__,
2207 "S.title() -> string\n\
2208 \n\
2209 Return a titlecased version of S, i.e. words start with uppercase\n\
2210 characters, all remaining cased characters have lowercase.");
2211
2212 static PyObject*
2213 string_title(PyStringObject *self)
2214 {
2215         char *s = PyString_AS_STRING(self), *s_new;
2216         Py_ssize_t i, n = PyString_GET_SIZE(self);
2217         int previous_is_cased = 0;
2218         PyObject *newobj;
2219
2220         newobj = PyString_FromStringAndSize(NULL, n);
2221         if (newobj == NULL)
2222                 return NULL;
2223         s_new = PyString_AsString(newobj);
2224         for (i = 0; i < n; i++) {
2225                 int c = Py_CHARMASK(*s++);
2226                 if (islower(c)) {
2227                         if (!previous_is_cased)
2228                             c = toupper(c);
2229                         previous_is_cased = 1;
2230                 } else if (isupper(c)) {
2231                         if (previous_is_cased)
2232                             c = tolower(c);
2233                         previous_is_cased = 1;
2234                 } else
2235                         previous_is_cased = 0;
2236                 *s_new++ = c;
2237         }
2238         return newobj;
2239 }
2240
2241 PyDoc_STRVAR(capitalize__doc__,
2242 "S.capitalize() -> string\n\
2243 \n\
2244 Return a copy of the string S with only its first character\n\
2245 capitalized.");
2246
2247 static PyObject *
2248 string_capitalize(PyStringObject *self)
2249 {
2250         char *s = PyString_AS_STRING(self), *s_new;
2251         Py_ssize_t i, n = PyString_GET_SIZE(self);
2252         PyObject *newobj;
2253
2254         newobj = PyString_FromStringAndSize(NULL, n);
2255         if (newobj == NULL)
2256                 return NULL;
2257         s_new = PyString_AsString(newobj);
2258         if (0 < n) {
2259                 int c = Py_CHARMASK(*s++);
2260                 if (islower(c))
2261                         *s_new = toupper(c);
2262                 else
2263                         *s_new = c;
2264                 s_new++;
2265         }
2266         for (i = 1; i < n; i++) {
2267                 int c = Py_CHARMASK(*s++);
2268                 if (isupper(c))
2269                         *s_new = tolower(c);
2270                 else
2271                         *s_new = c;
2272                 s_new++;
2273         }
2274         return newobj;
2275 }
2276
2277
2278 PyDoc_STRVAR(count__doc__,
2279 "S.count(sub[, start[, end]]) -> int\n\
2280 \n\
2281 Return the number of non-overlapping occurrences of substring sub in\n\
2282 string S[start:end].  Optional arguments start and end are interpreted\n\
2283 as in slice notation.");
2284
2285 static PyObject *
2286 string_count(PyStringObject *self, PyObject *args)
2287 {
2288         PyObject *sub_obj;
2289         const char *str = PyString_AS_STRING(self), *sub;
2290         Py_ssize_t sub_len;
2291         Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
2292
2293         if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2294                 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2295                 return NULL;
2296
2297         if (PyString_Check(sub_obj)) {
2298                 sub = PyString_AS_STRING(sub_obj);
2299                 sub_len = PyString_GET_SIZE(sub_obj);
2300         }
2301 #ifdef Py_USING_UNICODE
2302         else if (PyUnicode_Check(sub_obj)) {
2303                 Py_ssize_t count;
2304                 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2305                 if (count == -1)
2306                         return NULL;
2307                 else
2308                         return PyInt_FromSsize_t(count);
2309         }
2310 #endif
2311         else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2312                 return NULL;
2313
2314         string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
2315
2316         return PyInt_FromSsize_t(
2317                 stringlib_count(str + start, end - start, sub, sub_len)
2318                 );
2319 }
2320
2321 PyDoc_STRVAR(swapcase__doc__,
2322 "S.swapcase() -> string\n\
2323 \n\
2324 Return a copy of the string S with uppercase characters\n\
2325 converted to lowercase and vice versa.");
2326
2327 static PyObject *
2328 string_swapcase(PyStringObject *self)
2329 {
2330         char *s = PyString_AS_STRING(self), *s_new;
2331         Py_ssize_t i, n = PyString_GET_SIZE(self);
2332         PyObject *newobj;
2333
2334         newobj = PyString_FromStringAndSize(NULL, n);
2335         if (newobj == NULL)
2336                 return NULL;
2337         s_new = PyString_AsString(newobj);
2338         for (i = 0; i < n; i++) {
2339                 int c = Py_CHARMASK(*s++);
2340                 if (islower(c)) {
2341                         *s_new = toupper(c);
2342                 }
2343                 else if (isupper(c)) {
2344                         *s_new = tolower(c);
2345                 }
2346                 else
2347                         *s_new = c;
2348                 s_new++;
2349         }
2350         return newobj;
2351 }
2352
2353
2354 PyDoc_STRVAR(translate__doc__,
2355 "S.translate(table [,deletechars]) -> string\n\
2356 \n\
2357 Return a copy of the string S, where all characters occurring\n\
2358 in the optional argument deletechars are removed, and the\n\
2359 remaining characters have been mapped through the given\n\
2360 translation table, which must be a string of length 256.");
2361
2362 static PyObject *
2363 string_translate(PyStringObject *self, PyObject *args)
2364 {
2365         register char *input, *output;
2366         const char *table;
2367         register Py_ssize_t i, c, changed = 0;
2368         PyObject *input_obj = (PyObject*)self;
2369         const char *output_start, *del_table=NULL;
2370         Py_ssize_t inlen, tablen, dellen = 0;
2371         PyObject *result;
2372         int trans_table[256];
2373         PyObject *tableobj, *delobj = NULL;
2374
2375         if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2376                               &tableobj, &delobj))
2377                 return NULL;
2378
2379         if (PyString_Check(tableobj)) {
2380                 table = PyString_AS_STRING(tableobj);
2381                 tablen = PyString_GET_SIZE(tableobj);
2382         }
2383         else if (tableobj == Py_None) {
2384                 table = NULL;
2385                 tablen = 256;
2386         }
2387 #ifdef Py_USING_UNICODE
2388         else if (PyUnicode_Check(tableobj)) {
2389                 /* Unicode .translate() does not support the deletechars
2390                    parameter; instead a mapping to None will cause characters
2391                    to be deleted. */
2392                 if (delobj != NULL) {
2393                         PyErr_SetString(PyExc_TypeError,
2394                         "deletions are implemented differently for unicode");
2395                         return NULL;
2396                 }
2397                 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2398         }
2399 #endif
2400         else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2401                 return NULL;
2402
2403         if (tablen != 256) {
2404                 PyErr_SetString(PyExc_ValueError,
2405                   "translation table must be 256 characters long");
2406                 return NULL;
2407         }
2408
2409         if (delobj != NULL) {
2410                 if (PyString_Check(delobj)) {
2411                         del_table = PyString_AS_STRING(delobj);
2412                         dellen = PyString_GET_SIZE(delobj);
2413                 }
2414 #ifdef Py_USING_UNICODE
2415                 else if (PyUnicode_Check(delobj)) {
2416                         PyErr_SetString(PyExc_TypeError,
2417                         "deletions are implemented differently for unicode");
2418                         return NULL;
2419                 }
2420 #endif
2421                 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2422                         return NULL;
2423         }
2424         else {
2425                 del_table = NULL;
2426                 dellen = 0;
2427         }
2428
2429         inlen = PyString_GET_SIZE(input_obj);
2430         result = PyString_FromStringAndSize((char *)NULL, inlen);
2431         if (result == NULL)
2432                 return NULL;
2433         output_start = output = PyString_AsString(result);
2434         input = PyString_AS_STRING(input_obj);
2435
2436         if (dellen == 0 && table != NULL) {
2437                 /* If no deletions are required, use faster code */
2438                 for (i = inlen; --i >= 0; ) {
2439                         c = Py_CHARMASK(*input++);
2440                         if (Py_CHARMASK((*output++ = table[c])) != c)
2441                                 changed = 1;
2442                 }
2443                 if (changed || !PyString_CheckExact(input_obj))
2444                         return result;
2445                 Py_DECREF(result);
2446                 Py_INCREF(input_obj);
2447                 return input_obj;
2448         }
2449
2450         if (table == NULL) {
2451                 for (i = 0; i < 256; i++)
2452                         trans_table[i] = Py_CHARMASK(i);
2453         } else {
2454                 for (i = 0; i < 256; i++)
2455                         trans_table[i] = Py_CHARMASK(table[i]);
2456         }
2457
2458         for (i = 0; i < dellen; i++)
2459                 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2460
2461         for (i = inlen; --i >= 0; ) {
2462                 c = Py_CHARMASK(*input++);
2463                 if (trans_table[c] != -1)
2464                         if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2465                                 continue;
2466                 changed = 1;
2467         }
2468         if (!changed && PyString_CheckExact(input_obj)) {
2469                 Py_DECREF(result);
2470                 Py_INCREF(input_obj);
2471                 return input_obj;
2472         }
2473         /* Fix the size of the resulting string */
2474         if (inlen > 0)
2475                 _PyString_Resize(&result, output - output_start);
2476         return result;
2477 }
2478
2479
2480 #define FORWARD 1
2481 #define REVERSE -1
2482
2483 /* find and count characters and substrings */
2484
2485 #define findchar(target, target_len, c)                         \
2486   ((char *)memchr((const void *)(target), c, target_len))
2487
2488 /* String ops must return a string.  */
2489 /* If the object is subclass of string, create a copy */
2490 Py_LOCAL(PyStringObject *)
2491 return_self(PyStringObject *self)
2492 {
2493         if (PyString_CheckExact(self)) {
2494                 Py_INCREF(self);
2495                 return self;
2496         }
2497         return (PyStringObject *)PyString_FromStringAndSize(
2498                 PyString_AS_STRING(self),
2499                 PyString_GET_SIZE(self));
2500 }
2501
2502 Py_LOCAL_INLINE(Py_ssize_t)
2503 countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
2504 {
2505         Py_ssize_t count=0;
2506         const char *start=target;
2507         const char *end=target+target_len;
2508
2509         while ( (start=findchar(start, end-start, c)) != NULL ) {
2510                 count++;
2511                 if (count >= maxcount)
2512                         break;
2513                 start += 1;
2514         }
2515         return count;
2516 }
2517
2518 Py_LOCAL(Py_ssize_t)
2519 findstring(const char *target, Py_ssize_t target_len,
2520            const char *pattern, Py_ssize_t pattern_len,
2521            Py_ssize_t start,
2522            Py_ssize_t end,
2523            int direction)
2524 {
2525         if (start < 0) {
2526                 start += target_len;
2527                 if (start < 0)
2528                         start = 0;
2529         }
2530         if (end > target_len) {
2531                 end = target_len;
2532         } else if (end < 0) {
2533                 end += target_len;
2534                 if (end < 0)
2535                         end = 0;
2536         }
2537
2538         /* zero-length substrings always match at the first attempt */
2539         if (pattern_len == 0)
2540                 return (direction > 0) ? start : end;
2541
2542         end -= pattern_len;
2543
2544         if (direction < 0) {
2545                 for (; end >= start; end--)
2546                         if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2547                                 return end;
2548         } else {
2549                 for (; start <= end; start++)
2550                         if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2551                                 return start;
2552         }
2553         return -1;
2554 }
2555
2556 Py_LOCAL_INLINE(Py_ssize_t)
2557 countstring(const char *target, Py_ssize_t target_len,
2558             const char *pattern, Py_ssize_t pattern_len,
2559             Py_ssize_t start,
2560             Py_ssize_t end,
2561             int direction, Py_ssize_t maxcount)
2562 {
2563         Py_ssize_t count=0;
2564
2565         if (start < 0) {
2566                 start += target_len;
2567                 if (start < 0)
2568                         start = 0;
2569         }
2570         if (end > target_len) {
2571                 end = target_len;
2572         } else if (end < 0) {
2573                 end += target_len;
2574                 if (end < 0)
2575                         end = 0;
2576         }
2577
2578         /* zero-length substrings match everywhere */
2579         if (pattern_len == 0 || maxcount == 0) {
2580                 if (target_len+1 < maxcount)
2581                         return target_len+1;
2582                 return maxcount;
2583         }
2584
2585         end -= pattern_len;
2586         if (direction < 0) {
2587                 for (; (end >= start); end--)
2588                         if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2589                                 count++;
2590                                 if (--maxcount <= 0) break;
2591                                 end -= pattern_len-1;
2592                         }
2593         } else {
2594                 for (; (start <= end); start++)
2595                         if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2596                                 count++;
2597                                 if (--maxcount <= 0)
2598                                         break;
2599                                 start += pattern_len-1;
2600                         }
2601         }
2602         return count;
2603 }
2604
2605
2606 /* Algorithms for different cases of string replacement */
2607
2608 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2609 Py_LOCAL(PyStringObject *)
2610 replace_interleave(PyStringObject *self,
2611                    const char *to_s, Py_ssize_t to_len,
2612                    Py_ssize_t maxcount)
2613 {
2614         char *self_s, *result_s;
2615         Py_ssize_t self_len, result_len;
2616         Py_ssize_t count, i, product;
2617         PyStringObject *result;
2618
2619         self_len = PyString_GET_SIZE(self);
2620
2621         /* 1 at the end plus 1 after every character */
2622         count = self_len+1;
2623         if (maxcount < count)
2624                 count = maxcount;
2625
2626         /* Check for overflow */
2627         /*   result_len = count * to_len + self_len; */
2628         product = count * to_len;
2629         if (product / to_len != count) {
2630                 PyErr_SetString(PyExc_OverflowError,
2631                                 "replace string is too long");
2632                 return NULL;
2633         }
2634         result_len = product + self_len;
2635         if (result_len < 0) {
2636                 PyErr_SetString(PyExc_OverflowError,
2637                                 "replace string is too long");
2638                 return NULL;
2639         }
2640
2641         if (! (result = (PyStringObject *)
2642                          PyString_FromStringAndSize(NULL, result_len)) )
2643                 return NULL;
2644
2645         self_s = PyString_AS_STRING(self);
2646         result_s = PyString_AS_STRING(result);
2647
2648         /* TODO: special case single character, which doesn't need memcpy */
2649
2650         /* Lay the first one down (guaranteed this will occur) */
2651         Py_MEMCPY(result_s, to_s, to_len);
2652         result_s += to_len;
2653         count -= 1;
2654
2655         for (i=0; i<count; i++) {
2656                 *result_s++ = *self_s++;
2657                 Py_MEMCPY(result_s, to_s, to_len);
2658                 result_s += to_len;
2659         }
2660
2661         /* Copy the rest of the original string */
2662         Py_MEMCPY(result_s, self_s, self_len-i);
2663
2664         return result;
2665 }
2666
2667 /* Special case for deleting a single character */
2668 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2669 Py_LOCAL(PyStringObject *)
2670 replace_delete_single_character(PyStringObject *self,
2671                                 char from_c, Py_ssize_t maxcount)
2672 {
2673         char *self_s, *result_s;
2674         char *start, *next, *end;
2675         Py_ssize_t self_len, result_len;
2676         Py_ssize_t count;
2677         PyStringObject *result;
2678
2679         self_len = PyString_GET_SIZE(self);
2680         self_s = PyString_AS_STRING(self);
2681
2682         count = countchar(self_s, self_len, from_c, maxcount);
2683         if (count == 0) {
2684                 return return_self(self);
2685         }
2686
2687         result_len = self_len - count;  /* from_len == 1 */
2688         assert(result_len>=0);
2689
2690         if ( (result = (PyStringObject *)
2691                         PyString_FromStringAndSize(NULL, result_len)) == NULL)
2692                 return NULL;
2693         result_s = PyString_AS_STRING(result);
2694
2695         start = self_s;
2696         end = self_s + self_len;
2697         while (count-- > 0) {
2698                 next = findchar(start, end-start, from_c);
2699                 if (next == NULL)
2700                         break;
2701                 Py_MEMCPY(result_s, start, next-start);
2702                 result_s += (next-start);
2703                 start = next+1;
2704         }
2705         Py_MEMCPY(result_s, start, end-start);
2706
2707         return result;
2708 }
2709
2710 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2711
2712 Py_LOCAL(PyStringObject *)
2713 replace_delete_substring(PyStringObject *self,
2714                          const char *from_s, Py_ssize_t from_len,
2715                          Py_ssize_t maxcount) {
2716         char *self_s, *result_s;
2717         char *start, *next, *end;
2718         Py_ssize_t self_len, result_len;
2719         Py_ssize_t count, offset;
2720         PyStringObject *result;
2721
2722         self_len = PyString_GET_SIZE(self);
2723         self_s = PyString_AS_STRING(self);
2724
2725         count = countstring(self_s, self_len,
2726                             from_s, from_len,
2727                             0, self_len, 1,
2728                             maxcount);
2729
2730         if (count == 0) {
2731                 /* no matches */
2732                 return return_self(self);
2733         }
2734
2735         result_len = self_len - (count * from_len);
2736         assert (result_len>=0);
2737
2738         if ( (result = (PyStringObject *)
2739               PyString_FromStringAndSize(NULL, result_len)) == NULL )
2740                 return NULL;
2741
2742         result_s = PyString_AS_STRING(result);
2743
2744         start = self_s;
2745         end = self_s + self_len;
2746         while (count-- > 0) {
2747                 offset = findstring(start, end-start,
2748                                     from_s, from_len,
2749                                     0, end-start, FORWARD);
2750                 if (offset == -1)
2751                         break;
2752                 next = start + offset;
2753
2754                 Py_MEMCPY(result_s, start, next-start);
2755
2756                 result_s += (next-start);
2757                 start = next+from_len;
2758         }
2759         Py_MEMCPY(result_s, start, end-start);
2760         return result;
2761 }
2762
2763 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2764 Py_LOCAL(PyStringObject *)
2765 replace_single_character_in_place(PyStringObject *self,
2766                                   char from_c, char to_c,
2767                                   Py_ssize_t maxcount)
2768 {
2769         char *self_s, *result_s, *start, *end, *next;
2770         Py_ssize_t self_len;
2771         PyStringObject *result;
2772
2773         /* The result string will be the same size */
2774         self_s = PyString_AS_STRING(self);
2775         self_len = PyString_GET_SIZE(self);
2776
2777         next = findchar(self_s, self_len, from_c);
2778
2779         if (next == NULL) {
2780                 /* No matches; return the original string */
2781                 return return_self(self);
2782         }
2783
2784         /* Need to make a new string */
2785         result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2786         if (result == NULL)
2787                 return NULL;
2788         result_s = PyString_AS_STRING(result);
2789         Py_MEMCPY(result_s, self_s, self_len);
2790
2791         /* change everything in-place, starting with this one */
2792         start =  result_s + (next-self_s);
2793         *start = to_c;
2794         start++;
2795         end = result_s + self_len;
2796
2797         while (--maxcount > 0) {
2798                 next = findchar(start, end-start, from_c);
2799                 if (next == NULL)
2800                         break;
2801                 *next = to_c;
2802                 start = next+1;
2803         }
2804
2805         return result;
2806 }
2807
2808 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2809 Py_LOCAL(PyStringObject *)
2810 replace_substring_in_place(PyStringObject *self,
2811                            const char *from_s, Py_ssize_t from_len,
2812                            const char *to_s, Py_ssize_t to_len,
2813                            Py_ssize_t maxcount)
2814 {
2815         char *result_s, *start, *end;
2816         char *self_s;
2817         Py_ssize_t self_len, offset;
2818         PyStringObject *result;
2819
2820         /* The result string will be the same size */
2821
2822         self_s = PyString_AS_STRING(self);
2823         self_len = PyString_GET_SIZE(self);
2824
2825         offset = findstring(self_s, self_len,
2826                             from_s, from_len,
2827                             0, self_len, FORWARD);
2828         if (offset == -1) {
2829                 /* No matches; return the original string */
2830                 return return_self(self);
2831         }
2832
2833         /* Need to make a new string */
2834         result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2835         if (result == NULL)
2836                 return NULL;
2837         result_s = PyString_AS_STRING(result);
2838         Py_MEMCPY(result_s, self_s, self_len);
2839
2840         /* change everything in-place, starting with this one */
2841         start =  result_s + offset;
2842         Py_MEMCPY(start, to_s, from_len);
2843         start += from_len;
2844         end = result_s + self_len;
2845
2846         while ( --maxcount > 0) {
2847                 offset = findstring(start, end-start,
2848                                     from_s, from_len,
2849                                     0, end-start, FORWARD);
2850                 if (offset==-1)
2851                         break;
2852                 Py_MEMCPY(start+offset, to_s, from_len);
2853                 start += offset+from_len;
2854         }
2855
2856         return result;
2857 }
2858
2859 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2860 Py_LOCAL(PyStringObject *)
2861 replace_single_character(PyStringObject *self,
2862                          char from_c,
2863                          const char *to_s, Py_ssize_t to_len,
2864                          Py_ssize_t maxcount)
2865 {
2866         char *self_s, *result_s;
2867         char *start, *next, *end;
2868         Py_ssize_t self_len, result_len;
2869         Py_ssize_t count, product;
2870         PyStringObject *result;
2871
2872         self_s = PyString_AS_STRING(self);
2873         self_len = PyString_GET_SIZE(self);
2874
2875         count = countchar(self_s, self_len, from_c, maxcount);
2876         if (count == 0) {
2877                 /* no matches, return unchanged */
2878                 return return_self(self);
2879         }
2880
2881         /* use the difference between current and new, hence the "-1" */
2882         /*   result_len = self_len + count * (to_len-1)  */
2883         product = count * (to_len-1);
2884         if (product / (to_len-1) != count) {
2885                 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2886                 return NULL;
2887         }
2888         result_len = self_len + product;
2889         if (result_len < 0) {
2890                 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2891                 return NULL;
2892         }
2893
2894         if ( (result = (PyStringObject *)
2895               PyString_FromStringAndSize(NULL, result_len)) == NULL)
2896                 return NULL;
2897         result_s = PyString_AS_STRING(result);
2898
2899         start = self_s;
2900         end = self_s + self_len;
2901         while (count-- > 0) {
2902                 next = findchar(start, end-start, from_c);
2903                 if (next == NULL)
2904                         break;
2905
2906                 if (next == start) {
2907                         /* replace with the 'to' */
2908                         Py_MEMCPY(result_s, to_s, to_len);
2909                         result_s += to_len;
2910                         start += 1;
2911                 } else {
2912                         /* copy the unchanged old then the 'to' */
2913                         Py_MEMCPY(result_s, start, next-start);
2914                         result_s += (next-start);
2915                         Py_MEMCPY(result_s, to_s, to_len);
2916                         result_s += to_len;
2917                         start = next+1;
2918                 }
2919         }
2920         /* Copy the remainder of the remaining string */
2921         Py_MEMCPY(result_s, start, end-start);
2922
2923         return result;
2924 }
2925
2926 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2927 Py_LOCAL(PyStringObject *)
2928 replace_substring(PyStringObject *self,
2929                   const char *from_s, Py_ssize_t from_len,
2930                   const char *to_s, Py_ssize_t to_len,
2931                   Py_ssize_t maxcount) {
2932         char *self_s, *result_s;
2933         char *start, *next, *end;
2934         Py_ssize_t self_len, result_len;
2935         Py_ssize_t count, offset, product;
2936         PyStringObject *result;
2937
2938         self_s = PyString_AS_STRING(self);
2939         self_len = PyString_GET_SIZE(self);
2940
2941         count = countstring(self_s, self_len,
2942                             from_s, from_len,
2943                             0, self_len, FORWARD, maxcount);
2944         if (count == 0) {
2945                 /* no matches, return unchanged */
2946                 return return_self(self);
2947         }
2948
2949         /* Check for overflow */
2950         /*    result_len = self_len + count * (to_len-from_len) */
2951         product = count * (to_len-from_len);
2952         if (product / (to_len-from_len) != count) {
2953                 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2954                 return NULL;
2955         }
2956         result_len = self_len + product;
2957         if (result_len < 0) {
2958                 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2959                 return NULL;
2960         }
2961
2962         if ( (result = (PyStringObject *)
2963               PyString_FromStringAndSize(NULL, result_len)) == NULL)
2964                 return NULL;
2965         result_s = PyString_AS_STRING(result);
2966
2967         start = self_s;
2968         end = self_s + self_len;
2969         while (count-- > 0) {
2970                 offset = findstring(start, end-start,
2971                                     from_s, from_len,
2972                                     0, end-start, FORWARD);
2973                 if (offset == -1)
2974                         break;
2975                 next = start+offset;
2976                 if (next == start) {
2977                         /* replace with the 'to' */
2978                         Py_MEMCPY(result_s, to_s, to_len);
2979                         result_s += to_len;
2980                         start += from_len;
2981                 } else {
2982                         /* copy the unchanged old then the 'to' */
2983                         Py_MEMCPY(result_s, start, next-start);
2984                         result_s += (next-start);
2985                         Py_MEMCPY(result_s, to_s, to_len);
2986                         result_s += to_len;
2987                         start = next+from_len;
2988                 }
2989         }
2990         /* Copy the remainder of the remaining string */
2991         Py_MEMCPY(result_s, start, end-start);
2992
2993         return result;
2994 }
2995
2996
2997 Py_LOCAL(PyStringObject *)
2998 replace(PyStringObject *self,
2999         const char *from_s, Py_ssize_t from_len,
3000         const char *to_s, Py_ssize_t to_len,
3001         Py_ssize_t maxcount)
3002 {
3003         if (maxcount < 0) {
3004                 maxcount = PY_SSIZE_T_MAX;
3005         } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
3006                 /* nothing to do; return the original string */
3007                 return return_self(self);
3008         }
3009
3010         if (maxcount == 0 ||
3011             (from_len == 0 && to_len == 0)) {
3012                 /* nothing to do; return the original string */
3013                 return return_self(self);
3014         }
3015
3016         /* Handle zero-length special cases */
3017
3018         if (from_len == 0) {
3019                 /* insert the 'to' string everywhere.   */
3020                 /*    >>> "Python".replace("", ".")     */
3021                 /*    '.P.y.t.h.o.n.'                   */
3022                 return replace_interleave(self, to_s, to_len, maxcount);
3023         }
3024
3025         /* Except for "".replace("", "A") == "A" there is no way beyond this */
3026         /* point for an empty self string to generate a non-empty string */
3027         /* Special case so the remaining code always gets a non-empty string */
3028         if (PyString_GET_SIZE(self) == 0) {
3029                 return return_self(self);
3030         }
3031
3032         if (to_len == 0) {
3033                 /* delete all occurances of 'from' string */
3034                 if (from_len == 1) {
3035                         return replace_delete_single_character(
3036                                 self, from_s[0], maxcount);
3037                 } else {
3038                         return replace_delete_substring(self, from_s, from_len, maxcount);
3039                 }
3040         }
3041
3042         /* Handle special case where both strings have the same length */
3043
3044         if (from_len == to_len) {
3045                 if (from_len == 1) {
3046                         return replace_single_character_in_place(
3047                                 self,
3048                                 from_s[0],
3049                                 to_s[0],
3050                                 maxcount);
3051                 } else {
3052                         return replace_substring_in_place(
3053                                 self, from_s, from_len, to_s, to_len, maxcount);
3054                 }
3055         }
3056
3057         /* Otherwise use the more generic algorithms */
3058         if (from_len == 1) {
3059                 return replace_single_character(self, from_s[0],
3060                                                 to_s, to_len, maxcount);
3061         } else {
3062                 /* len('from')>=2, len('to')>=1 */
3063                 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
3064         }
3065 }
3066
3067 PyDoc_STRVAR(replace__doc__,
3068 "S.replace (old, new[, count]) -> string\n\
3069 \n\
3070 Return a copy of string S with all occurrences of substring\n\
3071 old replaced by new.  If the optional argument count is\n\
3072 given, only the first count occurrences are replaced.");
3073
3074 static PyObject *
3075 string_replace(PyStringObject *self, PyObject *args)
3076 {
3077         Py_ssize_t count = -1;
3078         PyObject *from, *to;
3079         const char *from_s, *to_s;
3080         Py_ssize_t from_len, to_len;
3081
3082         if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
3083                 return NULL;
3084
3085         if (PyString_Check(from)) {
3086                 from_s = PyString_AS_STRING(from);
3087                 from_len = PyString_GET_SIZE(from);
3088         }
3089 #ifdef Py_USING_UNICODE
3090         if (PyUnicode_Check(from))
3091                 return PyUnicode_Replace((PyObject *)self,
3092                                          from, to, count);
3093 #endif
3094         else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
3095                 return NULL;
3096
3097         if (PyString_Check(to)) {
3098                 to_s = PyString_AS_STRING(to);
3099                 to_len = PyString_GET_SIZE(to);
3100         }
3101 #ifdef Py_USING_UNICODE
3102         else if (PyUnicode_Check(to))
3103                 return PyUnicode_Replace((PyObject *)self,
3104                                          from, to, count);
3105 #endif
3106         else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
3107                 return NULL;
3108
3109         return (PyObject *)replace((PyStringObject *) self,
3110                                    from_s, from_len,
3111                                    to_s, to_len, count);
3112 }
3113
3114 /** End DALKE **/
3115
3116 /* Matches the end (direction >= 0) or start (direction < 0) of self
3117  * against substr, using the start and end arguments. Returns
3118  * -1 on error, 0 if not found and 1 if found.
3119  */
3120 Py_LOCAL(int)
3121 _string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
3122                   Py_ssize_t end, int direction)
3123 {
3124         Py_ssize_t len = PyString_GET_SIZE(self);
3125         Py_ssize_t slen;
3126         const char* sub;
3127         const char* str;
3128
3129         if (PyString_Check(substr)) {
3130                 sub = PyString_AS_STRING(substr);
3131                 slen = PyString_GET_SIZE(substr);
3132         }
3133 #ifdef Py_USING_UNICODE
3134         else if (PyUnicode_Check(substr))
3135                 return PyUnicode_Tailmatch((PyObject *)self,
3136                                            substr, start, end, direction);
3137 #endif
3138         else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3139                 return -1;
3140         str = PyString_AS_STRING(self);
3141
3142         string_adjust_indices(&start, &end, len);
3143
3144         if (direction < 0) {
3145                 /* startswith */
3146                 if (start+slen > len)
3147                         return 0;
3148         } else {
3149                 /* endswith */
3150                 if (end-start < slen || start > len)
3151                         return 0;
3152
3153                 if (end-slen > start)
3154                         start = end - slen;
3155         }
3156         if (end-start >= slen)
3157                 return ! memcmp(str+start, sub, slen);
3158         return 0;
3159 }
3160
3161
3162 PyDoc_STRVAR(startswith__doc__,
3163 "S.startswith(prefix[, start[, end]]) -> bool\n\
3164 \n\
3165 Return True if S starts with the specified prefix, False otherwise.\n\
3166 With optional start, test S beginning at that position.\n\
3167 With optional end, stop comparing S at that position.\n\
3168 prefix can also be a tuple of strings to try.");
3169
3170 static PyObject *
3171 string_startswith(PyStringObject *self, PyObject *args)
3172 {
3173         Py_ssize_t start = 0;
3174         Py_ssize_t end = PY_SSIZE_T_MAX;
3175         PyObject *subobj;
3176         int result;
3177
3178         if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3179                 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3180                 return NULL;
3181         if (PyTuple_Check(subobj)) {
3182                 Py_ssize_t i;
3183                 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3184                         result = _string_tailmatch(self,
3185                                         PyTuple_GET_ITEM(subobj, i),
3186                                         start, end, -1);
3187                         if (result == -1)
3188                                 return NULL;
3189                         else if (result) {
3190                                 Py_RETURN_TRUE;
3191                         }
3192                 }
3193                 Py_RETURN_FALSE;
3194         }
3195         result = _string_tailmatch(self, subobj, start, end, -1);
3196         if (result == -1)
3197                 return NULL;
3198         else
3199                 return PyBool_FromLong(result);
3200 }
3201
3202
3203 PyDoc_STRVAR(endswith__doc__,
3204 "S.endswith(suffix[, start[, end]]) -> bool\n\
3205 \n\
3206 Return True if S ends with the specified suffix, False otherwise.\n\
3207 With optional start, test S beginning at that position.\n\
3208 With optional end, stop comparing S at that position.\n\
3209 suffix can also be a tuple of strings to try.");
3210
3211 static PyObject *
3212 string_endswith(PyStringObject *self, PyObject *args)
3213 {
3214         Py_ssize_t start = 0;
3215         Py_ssize_t end = PY_SSIZE_T_MAX;
3216         PyObject *subobj;
3217         int result;
3218
3219         if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3220                 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3221                 return NULL;
3222         if (PyTuple_Check(subobj)) {
3223                 Py_ssize_t i;
3224                 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3225                         result = _string_tailmatch(self,
3226                                         PyTuple_GET_ITEM(subobj, i),
3227                                         start, end, +1);
3228                         if (result == -1)
3229                                 return NULL;
3230                         else if (result) {
3231                                 Py_RETURN_TRUE;
3232                         }
3233                 }
3234                 Py_RETURN_FALSE;
3235         }
3236         result = _string_tailmatch(self, subobj, start, end, +1);
3237         if (result == -1)
3238                 return NULL;
3239         else
3240                 return PyBool_FromLong(result);
3241 }
3242
3243
3244 PyDoc_STRVAR(encode__doc__,
3245 "S.encode([encoding[,errors]]) -> object\n\
3246 \n\
3247 Encodes S using the codec registered for encoding. encoding defaults\n\
3248 to the default encoding. errors may be given to set a different error\n\
3249 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3250 a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3251 'xmlcharrefreplace' as well as any other name registered with\n\
3252 codecs.register_error that is able to handle UnicodeEncodeErrors.");
3253
3254 static PyObject *
3255 string_encode(PyStringObject *self, PyObject *args)
3256 {
3257     char *encoding = NULL;
3258     char *errors = NULL;
3259     PyObject *v;
3260
3261     if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3262         return NULL;
3263     v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
3264     if (v == NULL)
3265         goto onError;
3266     if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3267         PyErr_Format(PyExc_TypeError,
3268                      "encoder did not return a string/unicode object "
3269                      "(type=%.400s)",
3270                      Py_Type(v)->tp_name);
3271         Py_DECREF(v);
3272         return NULL;
3273     }
3274     return v;
3275
3276  onError:
3277     return NULL;
3278 }
3279
3280
3281 PyDoc_STRVAR(decode__doc__,
3282 "S.decode([encoding[,errors]]) -> object\n\
3283 \n\
3284 Decodes S using the codec registered for encoding. encoding defaults\n\
3285 to the default encoding. errors may be given to set a different error\n\
3286 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3287 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3288 as well as any other name registerd with codecs.register_error that is\n\
3289 able to handle UnicodeDecodeErrors.");
3290
3291 static PyObject *
3292 string_decode(PyStringObject *self, PyObject *args)
3293 {
3294     char *encoding = NULL;
3295     char *errors = NULL;
3296     PyObject *v;
3297
3298     if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3299         return NULL;
3300     v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
3301     if (v == NULL)
3302         goto onError;
3303     if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3304         PyErr_Format(PyExc_TypeError,
3305                      "decoder did not return a string/unicode object "
3306                      "(type=%.400s)",
3307                      Py_Type(v)->tp_name);
3308         Py_DECREF(v);
3309         return NULL;
3310     }
3311     return v;
3312
3313  onError:
3314     return NULL;
3315 }
3316
3317
3318 PyDoc_STRVAR(expandtabs__doc__,
3319 "S.expandtabs([tabsize]) -> string\n\
3320 \n\
3321 Return a copy of S where all tab characters are expanded using spaces.\n\
3322 If tabsize is not given, a tab size of 8 characters is assumed.");
3323
3324 static PyObject*
3325 string_expandtabs(PyStringObject *self, PyObject *args)
3326 {
3327     const char *e, *p;
3328     char *q;
3329     Py_ssize_t i, j, old_j;
3330     PyObject *u;
3331     int tabsize = 8;
3332
3333     if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3334         return NULL;
3335
3336     /* First pass: determine size of output string */
3337     i = j = old_j = 0;
3338     e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
3339     for (p = PyString_AS_STRING(self); p < e; p++)
3340         if (*p == '\t') {
3341             if (tabsize > 0) {
3342                 j += tabsize - (j % tabsize);
3343                 if (old_j > j) {
3344                     PyErr_SetString(PyExc_OverflowError,
3345                                     "new string is too long");
3346                     return NULL;
3347                 }
3348                 old_j = j;
3349             }
3350         }
3351         else {
3352             j++;
3353             if (*p == '\n' || *p == '\r') {
3354                 i += j;
3355                 old_j = j = 0;
3356                 if (i < 0) {
3357                     PyErr_SetString(PyExc_OverflowError,
3358                                     "new string is too long");
3359                     return NULL;
3360                 }
3361             }
3362         }
3363
3364     if ((i + j) < 0) {
3365         PyErr_SetString(PyExc_OverflowError, "new string is too long");
3366         return NULL;
3367     }
3368
3369     /* Second pass: create output string and fill it */
3370     u = PyString_FromStringAndSize(NULL, i + j);
3371     if (!u)
3372         return NULL;
3373
3374     j = 0;
3375     q = PyString_AS_STRING(u);
3376
3377     for (p = PyString_AS_STRING(self); p < e; p++)
3378         if (*p == '\t') {
3379             if (tabsize > 0) {
3380                 i = tabsize - (j % tabsize);
3381                 j += i;
3382                 while (i--)
3383                     *q++ = ' ';
3384             }
3385         }
3386         else {
3387             j++;
3388             *q++ = *p;
3389             if (*p == '\n' || *p == '\r')
3390                 j = 0;
3391         }
3392
3393     return u;
3394 }
3395
3396 Py_LOCAL_INLINE(PyObject *)
3397 pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
3398 {
3399     PyObject *u;
3400
3401     if (left < 0)
3402         left = 0;
3403     if (right < 0)
3404         right = 0;
3405
3406     if (left == 0 && right == 0 && PyString_CheckExact(self)) {
3407         Py_INCREF(self);
3408         return (PyObject *)self;
3409     }
3410
3411     u = PyString_FromStringAndSize(NULL,
3412                                    left + PyString_GET_SIZE(self) + right);
3413     if (u) {
3414         if (left)
3415             memset(PyString_AS_STRING(u), fill, left);
3416         Py_MEMCPY(PyString_AS_STRING(u) + left,
3417                PyString_AS_STRING(self),
3418                PyString_GET_SIZE(self));
3419         if (right)
3420             memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3421                    fill, right);
3422     }
3423
3424     return u;
3425 }
3426
3427 PyDoc_STRVAR(ljust__doc__,
3428 "S.ljust(width[, fillchar]) -> string\n"
3429 "\n"
3430 "Return S left justified in a string of length width. Padding is\n"
3431 "done using the specified fill character (default is a space).");
3432
3433 static PyObject *
3434 string_ljust(PyStringObject *self, PyObject *args)
3435 {
3436     Py_ssize_t width;
3437     char fillchar = ' ';
3438
3439     if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
3440         return NULL;
3441
3442     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3443         Py_INCREF(self);
3444         return (PyObject*) self;
3445     }
3446
3447     return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
3448 }
3449
3450
3451 PyDoc_STRVAR(rjust__doc__,
3452 "S.rjust(width[, fillchar]) -> string\n"
3453 "\n"
3454 "Return S right justified in a string of length width. Padding is\n"
3455 "done using the specified fill character (default is a space)");
3456
3457 static PyObject *
3458 string_rjust(PyStringObject *self, PyObject *args)
3459 {
3460     Py_ssize_t width;
3461     char fillchar = ' ';
3462
3463     if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
3464         return NULL;
3465
3466     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3467         Py_INCREF(self);
3468         return (PyObject*) self;
3469     }
3470
3471     return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
3472 }
3473
3474
3475 PyDoc_STRVAR(center__doc__,
3476 "S.center(width[, fillchar]) -> string\n"
3477 "\n"
3478 "Return S centered in a string of length width. Padding is\n"
3479 "done using the specified fill character (default is a space)");
3480
3481 static PyObject *
3482 string_center(PyStringObject *self, PyObject *args)
3483 {
3484     Py_ssize_t marg, left;
3485     Py_ssize_t width;
3486     char fillchar = ' ';
3487
3488     if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
3489         return NULL;
3490
3491     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3492         Py_INCREF(self);
3493         return (PyObject*) self;
3494     }
3495
3496     marg = width - PyString_GET_SIZE(self);
3497     left = marg / 2 + (marg & width & 1);
3498
3499     return pad(self, left, marg - left, fillchar);
3500 }
3501
3502 PyDoc_STRVAR(zfill__doc__,
3503 "S.zfill(width) -> string\n"
3504 "\n"
3505 "Pad a numeric string S with zeros on the left, to fill a field\n"
3506 "of the specified width.  The string S is never truncated.");
3507
3508 static PyObject *
3509 string_zfill(PyStringObject *self, PyObject *args)
3510 {
3511     Py_ssize_t fill;
3512     PyObject *s;
3513     char *p;
3514     Py_ssize_t width;
3515
3516     if (!PyArg_ParseTuple(args, "n:zfill", &width))
3517         return NULL;
3518
3519     if (PyString_GET_SIZE(self) >= width) {
3520         if (PyString_CheckExact(self)) {
3521             Py_INCREF(self);
3522             return (PyObject*) self;
3523         }
3524         else
3525             return PyString_FromStringAndSize(
3526                 PyString_AS_STRING(self),
3527                 PyString_GET_SIZE(self)
3528             );
3529     }
3530
3531     fill = width - PyString_GET_SIZE(self);
3532
3533     s = pad(self, fill, 0, '0');
3534
3535     if (s == NULL)
3536         return NULL;
3537
3538     p = PyString_AS_STRING(s);
3539     if (p[fill] == '+' || p[fill] == '-') {
3540         /* move sign to beginning of string */
3541         p[0] = p[fill];
3542         p[fill] = '0';
3543     }
3544
3545     return (PyObject*) s;
3546 }
3547
3548 PyDoc_STRVAR(isspace__doc__,
3549 "S.isspace() -> bool\n\
3550 \n\
3551 Return True if all characters in S are whitespace\n\
3552 and there is at least one character in S, False otherwise.");
3553
3554 static PyObject*
3555 string_isspace(PyStringObject *self)
3556 {
3557     register const unsigned char *p
3558         = (unsigned char *) PyString_AS_STRING(self);
3559     register const unsigned char *e;
3560
3561     /* Shortcut for single character strings */
3562     if (PyString_GET_SIZE(self) == 1 &&
3563         isspace(*p))
3564         return PyBool_FromLong(1);
3565
3566     /* Special case for empty strings */
3567     if (PyString_GET_SIZE(self) == 0)
3568         return PyBool_FromLong(0);
3569
3570     e = p + PyString_GET_SIZE(self);
3571     for (; p < e; p++) {
3572         if (!isspace(*p))
3573             return PyBool_FromLong(0);
3574     }
3575     return PyBool_FromLong(1);
3576 }
3577
3578
3579 PyDoc_STRVAR(isalpha__doc__,
3580 "S.isalpha() -> bool\n\
3581 \n\
3582 Return True if all characters in S are alphabetic\n\
3583 and there is at least one character in S, False otherwise.");
3584
3585 static PyObject*
3586 string_isalpha(PyStringObject *self)
3587 {
3588     register const unsigned char *p
3589         = (unsigned char *) PyString_AS_STRING(self);
3590     register const unsigned char *e;
3591
3592     /* Shortcut for single character strings */
3593     if (PyString_GET_SIZE(self) == 1 &&
3594         isalpha(*p))
3595         return PyBool_FromLong(1);
3596
3597     /* Special case for empty strings */
3598     if (PyString_GET_SIZE(self) == 0)
3599         return PyBool_FromLong(0);
3600
3601     e = p + PyString_GET_SIZE(self);
3602     for (; p < e; p++) {
3603         if (!isalpha(*p))
3604             return PyBool_FromLong(0);
3605     }
3606     return PyBool_FromLong(1);
3607 }
3608
3609
3610 PyDoc_STRVAR(isalnum__doc__,
3611 "S.isalnum() -> bool\n\
3612 \n\
3613 Return True if all characters in S are alphanumeric\n\
3614 and there is at least one character in S, False otherwise.");
3615
3616 static PyObject*
3617 string_isalnum(PyStringObject *self)
3618 {
3619     register const unsigned char *p
3620         = (unsigned char *) PyString_AS_STRING(self);
3621     register const unsigned char *e;
3622
3623     /* Shortcut for single character strings */
3624     if (PyString_GET_SIZE(self) == 1 &&
3625         isalnum(*p))
3626         return PyBool_FromLong(1);
3627
3628     /* Special case for empty strings */
3629     if (PyString_GET_SIZE(self) == 0)
3630         return PyBool_FromLong(0);
3631
3632     e = p + PyString_GET_SIZE(self);
3633     for (; p < e; p++) {
3634         if (!isalnum(*p))
3635             return PyBool_FromLong(0);
3636     }
3637     return PyBool_FromLong(1);
3638 }
3639
3640
3641 PyDoc_STRVAR(isdigit__doc__,
3642 "S.isdigit() -> bool\n\
3643 \n\
3644 Return True if all characters in S are digits\n\
3645 and there is at least one character in S, False otherwise.");
3646
3647 static PyObject*
3648 string_isdigit(PyStringObject *self)
3649 {
3650     register const unsigned char *p
3651         = (unsigned char *) PyString_AS_STRING(self);
3652     register const unsigned char *e;
3653
3654     /* Shortcut for single character strings */
3655     if (PyString_GET_SIZE(self) == 1 &&
3656         isdigit(*p))
3657         return PyBool_FromLong(1);
3658
3659     /* Special case for empty strings */
3660     if (PyString_GET_SIZE(self) == 0)
3661         return PyBool_FromLong(0);
3662
3663     e = p + PyString_GET_SIZE(self);
3664     for (; p < e; p++) {
3665         if (!isdigit(*p))
3666             return PyBool_FromLong(0);
3667     }
3668     return PyBool_FromLong(1);
3669 }
3670
3671
3672 PyDoc_STRVAR(islower__doc__,
3673 "S.islower() -> bool\n\
3674 \n\
3675 Return True if all cased characters in S are lowercase and there is\n\
3676 at least one cased character in S, False otherwise.");
3677
3678 static PyObject*
3679 string_islower(PyStringObject *self)
3680 {
3681     register const unsigned char *p
3682         = (unsigned char *) PyString_AS_STRING(self);
3683     register const unsigned char *e;
3684     int cased;
3685
3686     /* Shortcut for single character strings */
3687     if (PyString_GET_SIZE(self) == 1)
3688         return PyBool_FromLong(islower(*p) != 0);
3689
3690     /* Special case for empty strings */
3691     if (PyString_GET_SIZE(self) == 0)
3692         return PyBool_FromLong(0);
3693
3694     e = p + PyString_GET_SIZE(self);
3695     cased = 0;
3696     for (; p < e; p++) {
3697         if (isupper(*p))
3698             return PyBool_FromLong(0);
3699         else if (!cased && islower(*p))
3700             cased = 1;
3701     }
3702     return PyBool_FromLong(cased);
3703 }
3704
3705
3706 PyDoc_STRVAR(isupper__doc__,
3707 "S.isupper() -> bool\n\
3708 \n\
3709 Return True if all cased characters in S are uppercase and there is\n\
3710 at least one cased character in S, False otherwise.");
3711
3712 static PyObject*
3713 string_isupper(PyStringObject *self)
3714 {
3715     register const unsigned char *p
3716         = (unsigned char *) PyString_AS_STRING(self);
3717     register const unsigned char *e;
3718     int cased;
3719
3720     /* Shortcut for single character strings */
3721     if (PyString_GET_SIZE(self) == 1)
3722         return PyBool_FromLong(isupper(*p) != 0);
3723
3724     /* Special case for empty strings */
3725     if (PyString_GET_SIZE(self) == 0)
3726         return PyBool_FromLong(0);
3727
3728     e = p + PyString_GET_SIZE(self);
3729     cased = 0;
3730     for (; p < e; p++) {
3731         if (islower(*p))
3732             return PyBool_FromLong(0);
3733         else if (!cased && isupper(*p))
3734             cased = 1;
3735     }
3736     return PyBool_FromLong(cased);
3737 }
3738
3739
3740 PyDoc_STRVAR(istitle__doc__,
3741 "S.istitle() -> bool\n\
3742 \n\
3743 Return True if S is a titlecased string and there is at least one\n\
3744 character in S, i.e. uppercase characters may only follow uncased\n\
3745 characters and lowercase characters only cased ones. Return False\n\
3746 otherwise.");
3747
3748 static PyObject*
3749 string_istitle(PyStringObject *self, PyObject *uncased)
3750 {
3751     register const unsigned char *p
3752         = (unsigned char *) PyString_AS_STRING(self);
3753     register const unsigned char *e;
3754     int cased, previous_is_cased;
3755
3756     /* Shortcut for single character strings */
3757     if (PyString_GET_SIZE(self) == 1)
3758         return PyBool_FromLong(isupper(*p) != 0);
3759
3760     /* Special case for empty strings */
3761     if (PyString_GET_SIZE(self) == 0)
3762         return PyBool_FromLong(0);
3763
3764     e = p + PyString_GET_SIZE(self);
3765     cased = 0;
3766     previous_is_cased = 0;
3767     for (; p < e; p++) {
3768         register const unsigned char ch = *p;
3769
3770         if (isupper(ch)) {
3771             if (previous_is_cased)
3772                 return PyBool_FromLong(0);
3773             previous_is_cased = 1;
3774             cased = 1;
3775         }
3776         else if (islower(ch)) {
3777             if (!previous_is_cased)
3778                 return PyBool_FromLong(0);
3779             previous_is_cased = 1;
3780             cased = 1;
3781         }
3782         else
3783             previous_is_cased = 0;
3784     }
3785     return PyBool_FromLong(cased);
3786 }
3787
3788
3789 PyDoc_STRVAR(splitlines__doc__,
3790 "S.splitlines([keepends]) -> list of strings\n\
3791 \n\
3792 Return a list of the lines in S, breaking at line boundaries.\n\
3793 Line breaks are not included in the resulting list unless keepends\n\
3794 is given and true.");
3795
3796 static PyObject*
3797 string_splitlines(PyStringObject *self, PyObject *args)
3798 {
3799     register Py_ssize_t i;
3800     register Py_ssize_t j;
3801     Py_ssize_t len;
3802     int keepends = 0;
3803     PyObject *list;
3804     PyObject *str;
3805     char *data;
3806
3807     if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
3808         return NULL;
3809
3810     data = PyString_AS_STRING(self);
3811     len = PyString_GET_SIZE(self);
3812
3813     /* This does not use the preallocated list because splitlines is
3814        usually run with hundreds of newlines.  The overhead of
3815        switching between PyList_SET_ITEM and append causes about a
3816        2-3% slowdown for that common case.  A smarter implementation
3817        could move the if check out, so the SET_ITEMs are done first
3818        and the appends only done when the prealloc buffer is full.
3819        That's too much work for little gain.*/
3820
3821     list = PyList_New(0);
3822     if (!list)
3823         goto onError;
3824
3825     for (i = j = 0; i < len; ) {
3826         Py_ssize_t eol;
3827
3828         /* Find a line and append it */
3829         while (i < len && data[i] != '\n' && data[i] != '\r')
3830             i++;
3831
3832         /* Skip the line break reading CRLF as one line break */
3833         eol = i;
3834         if (i < len) {
3835             if (data[i] == '\r' && i + 1 < len &&
3836                 data[i+1] == '\n')
3837                 i += 2;
3838             else
3839                 i++;
3840             if (keepends)
3841                 eol = i;
3842         }
3843         SPLIT_APPEND(data, j, eol);
3844         j = i;
3845     }
3846     if (j < len) {
3847         SPLIT_APPEND(data, j, len);
3848     }
3849
3850     return list;
3851
3852  onError:
3853     Py_XDECREF(list);
3854     return NULL;
3855 }
3856
3857 #undef SPLIT_APPEND
3858 #undef SPLIT_ADD
3859 #undef MAX_PREALLOC
3860 #undef PREALLOC_SIZE
3861
3862 static PyObject *
3863 string_getnewargs(PyStringObject *v)
3864 {
3865         return Py_BuildValue("(s#)", v->ob_sval, Py_Size(v));
3866 }
3867
3868 \f
3869 static PyMethodDef
3870 string_methods[] = {
3871         /* Counterparts of the obsolete stropmodule functions; except
3872            string.maketrans(). */
3873         {"join", (PyCFunction)string_join, METH_O, join__doc__},
3874         {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3875         {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
3876         {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3877         {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
3878         {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3879         {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3880         {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3881         {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3882         {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3883         {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3884         {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
3885         {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3886          capitalize__doc__},
3887         {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3888         {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3889          endswith__doc__},
3890         {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
3891         {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3892         {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3893         {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3894         {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3895         {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3896         {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3897         {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3898         {"rpartition", (PyCFunction)string_rpartition, METH_O,
3899          rpartition__doc__},
3900         {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3901          startswith__doc__},
3902         {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3903         {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3904          swapcase__doc__},
3905         {"translate", (PyCFunction)string_translate, METH_VARARGS,
3906          translate__doc__},
3907         {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3908         {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3909         {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3910         {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3911         {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3912         {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3913         {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3914         {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3915          expandtabs__doc__},
3916         {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3917          splitlines__doc__},
3918         {"__getnewargs__",      (PyCFunction)string_getnewargs, METH_NOARGS},
3919         {NULL,     NULL}                     /* sentinel */
3920 };
3921
3922 static PyObject *
3923 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3924
3925 static PyObject *
3926 string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3927 {
3928         PyObject *x = NULL;
3929         static char *kwlist[] = {"object", 0};
3930
3931         if (type != &PyString_Type)
3932                 return str_subtype_new(type, args, kwds);
3933         if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3934                 return NULL;
3935         if (x == NULL)
3936                 return PyString_FromString("");
3937         return PyObject_Str(x);
3938 }
3939
3940 static PyObject *
3941 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3942 {
3943         PyObject *tmp, *pnew;
3944         Py_ssize_t n;
3945
3946         assert(PyType_IsSubtype(type, &PyString_Type));
3947         tmp = string_new(&PyString_Type, args, kwds);
3948         if (tmp == NULL)
3949                 return NULL;
3950         assert(PyString_CheckExact(tmp));
3951         n = PyString_GET_SIZE(tmp);
3952         pnew = type->tp_alloc(type, n);
3953         if (pnew != NULL) {
3954                 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
3955                 ((PyStringObject *)pnew)->ob_shash =
3956                         ((PyStringObject *)tmp)->ob_shash;
3957                 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
3958         }
3959         Py_DECREF(tmp);
3960         return pnew;
3961 }
3962
3963 static PyObject *
3964 basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3965 {
3966         PyErr_SetString(PyExc_TypeError,
3967                         "The basestring type cannot be instantiated");
3968         return NULL;
3969 }
3970
3971 static PyObject *
3972 string_mod(PyObject *v, PyObject *w)
3973 {
3974         if (!PyString_Check(v)) {
3975                 Py_INCREF(Py_NotImplemented);
3976                 return Py_NotImplemented;
3977         }
3978         return PyString_Format(v, w);
3979 }
3980
3981 PyDoc_STRVAR(basestring_doc,
3982 "Type basestring cannot be instantiated; it is the base for str and unicode.");
3983
3984 static PyNumberMethods string_as_number = {
3985         0,                      /*nb_add*/
3986         0,                      /*nb_subtract*/
3987         0,                      /*nb_multiply*/
3988         0,                      /*nb_divide*/
3989         string_mod,             /*nb_remainder*/
3990 };
3991
3992
3993 PyTypeObject PyBaseString_Type = {
3994         PyVarObject_HEAD_INIT(&PyType_Type, 0)
3995         "basestring",
3996         0,
3997         0,
3998         0,                                      /* tp_dealloc */
3999         0,                                      /* tp_print */
4000         0,                                      /* tp_getattr */
4001         0,                                      /* tp_setattr */
4002         0,                                      /* tp_compare */
4003         0,                                      /* tp_repr */
4004         0,                                      /* tp_as_number */
4005         0,                                      /* tp_as_sequence */
4006         0,                                      /* tp_as_mapping */
4007         0,                                      /* tp_hash */
4008         0,                                      /* tp_call */
4009         0,                                      /* tp_str */
4010         0,                                      /* tp_getattro */
4011         0,                                      /* tp_setattro */
4012         0,                                      /* tp_as_buffer */
4013         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
4014         basestring_doc,                         /* tp_doc */
4015         0,                                      /* tp_traverse */
4016         0,                                      /* tp_clear */
4017         0,                                      /* tp_richcompare */
4018         0,                                      /* tp_weaklistoffset */
4019         0,                                      /* tp_iter */
4020         0,                                      /* tp_iternext */
4021         0,                                      /* tp_methods */
4022         0,                                      /* tp_members */
4023         0,                                      /* tp_getset */
4024         &PyBaseObject_Type,                     /* tp_base */
4025         0,                                      /* tp_dict */
4026         0,                                      /* tp_descr_get */
4027         0,                                      /* tp_descr_set */
4028         0,                                      /* tp_dictoffset */
4029         0,                                      /* tp_init */
4030         0,                                      /* tp_alloc */
4031         basestring_new,                         /* tp_new */
4032         0,                                      /* tp_free */
4033 };
4034
4035 PyDoc_STRVAR(string_doc,
4036 "str(object) -> string\n\
4037 \n\
4038 Return a nice string representation of the object.\n\
4039 If the argument is a string, the return value is the same object.");
4040
4041 PyTypeObject PyString_Type = {
4042         PyVarObject_HEAD_INIT(&PyType_Type, 0)
4043         "str",
4044         sizeof(PyStringObject),
4045         sizeof(char),
4046         string_dealloc,                         /* tp_dealloc */
4047         (printfunc)string_print,                /* tp_print */
4048         0,                                      /* tp_getattr */
4049         0,                                      /* tp_setattr */
4050         0,                                      /* tp_compare */
4051         string_repr,                            /* tp_repr */
4052         &string_as_number,                      /* tp_as_number */
4053         &string_as_sequence,                    /* tp_as_sequence */
4054         &string_as_mapping,                     /* tp_as_mapping */
4055         (hashfunc)string_hash,                  /* tp_hash */
4056         0,                                      /* tp_call */
4057         string_str,                             /* tp_str */
4058         PyObject_GenericGetAttr,                /* tp_getattro */
4059         0,                                      /* tp_setattro */
4060         &string_as_buffer,                      /* tp_as_buffer */
4061         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
4062                 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS,               /* tp_flags */
4063         string_doc,                             /* tp_doc */
4064         0,                                      /* tp_traverse */
4065         0,                                      /* tp_clear */
4066         (richcmpfunc)string_richcompare,        /* tp_richcompare */
4067         0,                                      /* tp_weaklistoffset */
4068         0,                                      /* tp_iter */
4069         0,                                      /* tp_iternext */
4070         string_methods,                         /* tp_methods */
4071         0,                                      /* tp_members */
4072         0,                                      /* tp_getset */
4073         &PyBaseString_Type,                     /* tp_base */
4074         0,                                      /* tp_dict */
4075         0,                                      /* tp_descr_get */
4076         0,                                      /* tp_descr_set */
4077         0,                                      /* tp_dictoffset */
4078         0,                                      /* tp_init */
4079         0,                                      /* tp_alloc */
4080         string_new,                             /* tp_new */
4081         PyObject_Del,                           /* tp_free */
4082 };
4083
4084 void
4085 PyString_Concat(register PyObject **pv, register PyObject *w)
4086 {
4087         register PyObject *v;
4088         if (*pv == NULL)
4089                 return;
4090         if (w == NULL || !PyString_Check(*pv)) {
4091                 Py_DECREF(*pv);
4092                 *pv = NULL;
4093                 return;
4094         }
4095         v = string_concat((PyStringObject *) *pv, w);
4096         Py_DECREF(*pv);
4097         *pv = v;
4098 }
4099
4100 void
4101 PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
4102 {
4103         PyString_Concat(pv, w);
4104         Py_XDECREF(w);
4105 }
4106
4107
4108 /* The following function breaks the notion that strings are immutable:
4109    it changes the size of a string.  We get away with this only if there
4110    is only one module referencing the object.  You can also think of it
4111    as creating a new string object and destroying the old one, only
4112    more efficiently.  In any case, don't use this if the string may
4113    already be known to some other part of the code...
4114    Note that if there's not enough memory to resize the string, the original
4115    string object at *pv is deallocated, *pv is set to NULL, an "out of
4116    memory" exception is set, and -1 is returned.  Else (on success) 0 is
4117    returned, and the value in *pv may or may not be the same as on input.
4118    As always, an extra byte is allocated for a trailing \0 byte (newsize
4119    does *not* include that), and a trailing \0 byte is stored.
4120 */
4121
4122 int
4123 _PyString_Resize(PyObject **pv, Py_ssize_t newsize)
4124 {
4125         register PyObject *v;
4126         register PyStringObject *sv;
4127         v = *pv;
4128         if (!PyString_Check(v) || Py_Refcnt(v) != 1 || newsize < 0 ||
4129             PyString_CHECK_INTERNED(v)) {
4130                 *pv = 0;
4131                 Py_DECREF(v);
4132                 PyErr_BadInternalCall();
4133                 return -1;
4134         }
4135         /* XXX UNREF/NEWREF interface should be more symmetrical */
4136         _Py_DEC_REFTOTAL;
4137         _Py_ForgetReference(v);
4138         *pv = (PyObject *)
4139                 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
4140         if (*pv == NULL) {
4141                 PyObject_Del(v);
4142                 PyErr_NoMemory();
4143                 return -1;
4144         }
4145         _Py_NewReference(*pv);
4146         sv = (PyStringObject *) *pv;
4147         Py_Size(sv) = newsize;
4148         sv->ob_sval[newsize] = '\0';
4149         sv->ob_shash = -1;      /* invalidate cached hash value */
4150         return 0;
4151 }
4152
4153 /* Helpers for formatstring */
4154
4155 Py_LOCAL_INLINE(PyObject *)
4156 getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
4157 {
4158         Py_ssize_t argidx = *p_argidx;
4159         if (argidx < arglen) {
4160                 (*p_argidx)++;
4161                 if (arglen < 0)
4162                         return args;
4163                 else
4164                         return PyTuple_GetItem(args, argidx);
4165         }
4166         PyErr_SetString(PyExc_TypeError,
4167                         "not enough arguments for format string");
4168         return NULL;
4169 }
4170
4171 /* Format codes
4172  * F_LJUST      '-'
4173  * F_SIGN       '+'
4174  * F_BLANK      ' '
4175  * F_ALT        '#'
4176  * F_ZERO       '0'
4177  */
4178 #define F_LJUST (1<<0)
4179 #define F_SIGN  (1<<1)
4180 #define F_BLANK (1<<2)
4181 #define F_ALT   (1<<3)
4182 #define F_ZERO  (1<<4)
4183
4184 Py_LOCAL_INLINE(int)
4185 formatfloat(char *buf, size_t buflen, int flags,
4186             int prec, int type, PyObject *v)
4187 {
4188         /* fmt = '%#.' + `prec` + `type`
4189            worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
4190         char fmt[20];
4191         double x;
4192         x = PyFloat_AsDouble(v);
4193         if (x == -1.0 && PyErr_Occurred()) {
4194                 PyErr_Format(PyExc_TypeError, "float argument required, "
4195                              "not %.200s", Py_Type(v)->tp_name);
4196                 return -1;
4197         }
4198         if (prec < 0)
4199                 prec = 6;
4200         if (type == 'f' && fabs(x)/1e25 >= 1e25)
4201                 type = 'g';
4202         /* Worst case length calc to ensure no buffer overrun:
4203
4204            'g' formats:
4205              fmt = %#.<prec>g
4206              buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
4207                 for any double rep.)
4208              len = 1 + prec + 1 + 2 + 5 = 9 + prec
4209
4210            'f' formats:
4211              buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4212              len = 1 + 50 + 1 + prec = 52 + prec
4213
4214            If prec=0 the effective precision is 1 (the leading digit is
4215            always given), therefore increase the length by one.
4216
4217         */
4218         if (((type == 'g' || type == 'G') &&
4219               buflen <= (size_t)10 + (size_t)prec) ||
4220             (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
4221                 PyErr_SetString(PyExc_OverflowError,
4222                         "formatted float is too long (precision too large?)");
4223                 return -1;
4224         }
4225         PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4226                       (flags&F_ALT) ? "#" : "",
4227                       prec, type);
4228         PyOS_ascii_formatd(buf, buflen, fmt, x);
4229         return (int)strlen(buf);
4230 }
4231
4232 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4233  * the F_ALT flag, for Python's long (unbounded) ints.  It's not used for
4234  * Python's regular ints.
4235  * Return value:  a new PyString*, or NULL if error.
4236  *  .  *pbuf is set to point into it,
4237  *     *plen set to the # of chars following that.
4238  *     Caller must decref it when done using pbuf.
4239  *     The string starting at *pbuf is of the form
4240  *         "-"? ("0x" | "0X")? digit+
4241  *     "0x"/"0X" are present only for x and X conversions, with F_ALT
4242  *         set in flags.  The case of hex digits will be correct,
4243  *     There will be at least prec digits, zero-filled on the left if
4244  *         necessary to get that many.
4245  * val          object to be converted
4246  * flags        bitmask of format flags; only F_ALT is looked at
4247  * prec         minimum number of digits; 0-fill on left if needed
4248  * type         a character in [duoxX]; u acts the same as d
4249  *
4250  * CAUTION:  o, x and X conversions on regular ints can never
4251  * produce a '-' sign, but can for Python's unbounded ints.
4252  */
4253 PyObject*
4254 _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4255                      char **pbuf, int *plen)
4256 {
4257         PyObject *result = NULL;
4258         char *buf;
4259         Py_ssize_t i;
4260         int sign;       /* 1 if '-', else 0 */
4261         int len;        /* number of characters */
4262         Py_ssize_t llen;
4263         int numdigits;  /* len == numnondigits + numdigits */
4264         int numnondigits = 0;
4265
4266         switch (type) {
4267         case 'd':
4268         case 'u':
4269                 result = Py_Type(val)->tp_str(val);
4270                 break;
4271         case 'o':
4272                 result = Py_Type(val)->tp_as_number->nb_oct(val);
4273                 break;
4274         case 'x':
4275         case 'X':
4276                 numnondigits = 2;
4277                 result = Py_Type(val)->tp_as_number->nb_hex(val);
4278                 break;
4279         default:
4280                 assert(!"'type' not in [duoxX]");
4281         }
4282         if (!result)
4283                 return NULL;
4284
4285         buf = PyString_AsString(result);
4286         if (!buf) {
4287                 Py_DECREF(result);
4288                 return NULL;
4289         }
4290
4291         /* To modify the string in-place, there can only be one reference. */
4292         if (Py_Refcnt(result) != 1) {
4293                 PyErr_BadInternalCall();
4294                 return NULL;
4295         }
4296         llen = PyString_Size(result);
4297         if (llen > INT_MAX) {
4298                 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4299                 return NULL;
4300         }
4301         len = (int)llen;
4302         if (buf[len-1] == 'L') {
4303                 --len;
4304                 buf[len] = '\0';
4305         }
4306         sign = buf[0] == '-';
4307         numnondigits += sign;
4308         numdigits = len - numnondigits;
4309         assert(numdigits > 0);
4310
4311         /* Get rid of base marker unless F_ALT */
4312         if ((flags & F_ALT) == 0) {
4313                 /* Need to skip 0x, 0X or 0. */
4314                 int skipped = 0;
4315                 switch (type) {
4316                 case 'o':
4317                         assert(buf[sign] == '0');
4318                         /* If 0 is only digit, leave it alone. */
4319                         if (numdigits > 1) {
4320                                 skipped = 1;
4321                                 --numdigits;
4322                         }
4323                         break;
4324                 case 'x':
4325                 case 'X':
4326                         assert(buf[sign] == '0');
4327                         assert(buf[sign + 1] == 'x');
4328                         skipped = 2;
4329                         numnondigits -= 2;
4330                         break;
4331                 }
4332                 if (skipped) {
4333                         buf += skipped;
4334                         len -= skipped;
4335                         if (sign)
4336                                 buf[0] = '-';
4337                 }
4338                 assert(len == numnondigits + numdigits);
4339                 assert(numdigits > 0);
4340         }
4341
4342         /* Fill with leading zeroes to meet minimum width. */
4343         if (prec > numdigits) {
4344                 PyObject *r1 = PyString_FromStringAndSize(NULL,
4345                                         numnondigits + prec);
4346                 char *b1;
4347                 if (!r1) {
4348                         Py_DECREF(result);
4349                         return NULL;
4350                 }
4351                 b1 = PyString_AS_STRING(r1);
4352                 for (i = 0; i < numnondigits; ++i)
4353                         *b1++ = *buf++;
4354                 for (i = 0; i < prec - numdigits; i++)
4355                         *b1++ = '0';
4356                 for (i = 0; i < numdigits; i++)
4357                         *b1++ = *buf++;
4358                 *b1 = '\0';
4359                 Py_DECREF(result);
4360                 result = r1;
4361                 buf = PyString_AS_STRING(result);
4362                 len = numnondigits + prec;
4363         }
4364
4365         /* Fix up case for hex conversions. */
4366         if (type == 'X') {
4367                 /* Need to convert all lower case letters to upper case.
4368                    and need to convert 0x to 0X (and -0x to -0X). */
4369                 for (i = 0; i < len; i++)
4370                         if (buf[i] >= 'a' && buf[i] <= 'x')
4371                                 buf[i] -= 'a'-'A';
4372         }
4373         *pbuf = buf;
4374         *plen = len;
4375         return result;
4376 }
4377
4378 Py_LOCAL_INLINE(int)
4379 formatint(char *buf, size_t buflen, int flags,
4380           int prec, int type, PyObject *v)
4381 {
4382         /* fmt = '%#.' + `prec` + 'l' + `type`
4383            worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4384            + 1 + 1 = 24 */
4385         char fmt[64];   /* plenty big enough! */
4386         char *sign;
4387         long x;
4388
4389         x = PyInt_AsLong(v);
4390         if (x == -1 && PyErr_Occurred()) {
4391                 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4392                              Py_Type(v)->tp_name);
4393                 return -1;
4394         }
4395         if (x < 0 && type == 'u') {
4396                 type = 'd';
4397         }
4398         if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4399                 sign = "-";
4400         else
4401                 sign = "";
4402         if (prec < 0)
4403                 prec = 1;
4404
4405         if ((flags & F_ALT) &&
4406             (type == 'x' || type == 'X')) {
4407                 /* When converting under %#x or %#X, there are a number
4408                  * of issues that cause pain:
4409                  * - when 0 is being converted, the C standard leaves off
4410                  *   the '0x' or '0X', which is inconsistent with other
4411                  *   %#x/%#X conversions and inconsistent with Python's
4412                  *   hex() function
4413                  * - there are platforms that violate the standard and
4414                  *   convert 0 with the '0x' or '0X'
4415                  *   (Metrowerks, Compaq Tru64)
4416                  * - there are platforms that give '0x' when converting
4417                  *   under %#X, but convert 0 in accordance with the
4418                  *   standard (OS/2 EMX)
4419                  *
4420                  * We can achieve the desired consistency by inserting our
4421                  * own '0x' or '0X' prefix, and substituting %x/%X in place
4422                  * of %#x/%#X.
4423                  *
4424                  * Note that this is the same approach as used in
4425                  * formatint() in unicodeobject.c
4426                  */
4427                 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4428                               sign, type, prec, type);
4429         }
4430         else {
4431                 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4432                               sign, (flags&F_ALT) ? "#" : "",
4433                               prec, type);
4434         }
4435
4436         /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4437          * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4438          */
4439         if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4440                 PyErr_SetString(PyExc_OverflowError,
4441                     "formatted integer is too long (precision too large?)");
4442                 return -1;
4443         }
4444         if (sign[0])
4445                 PyOS_snprintf(buf, buflen, fmt, -x);
4446         else
4447                 PyOS_snprintf(buf, buflen, fmt, x);
4448         return (int)strlen(buf);
4449 }
4450
4451 Py_LOCAL_INLINE(int)
4452 formatchar(char *buf, size_t buflen, PyObject *v)
4453 {
4454         /* presume that the buffer is at least 2 characters long */
4455         if (PyString_Check(v)) {
4456                 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4457                         return -1;
4458         }
4459         else {
4460                 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4461                         return -1;
4462         }
4463         buf[1] = '\0';
4464         return 1;
4465 }
4466
4467 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4468
4469    FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4470    chars are formatted. XXX This is a magic number. Each formatting
4471    routine does bounds checking to ensure no overflow, but a better
4472    solution may be to malloc a buffer of appropriate size for each
4473    format. For now, the current solution is sufficient.
4474 */
4475 #define FORMATBUFLEN (size_t)120
4476
4477 PyObject *
4478 PyString_Format(PyObject *format, PyObject *args)
4479 {
4480         char *fmt, *res;
4481         Py_ssize_t arglen, argidx;
4482         Py_ssize_t reslen, rescnt, fmtcnt;
4483         int args_owned = 0;
4484         PyObject *result, *orig_args;
4485 #ifdef Py_USING_UNICODE
4486         PyObject *v, *w;
4487 #endif
4488         PyObject *dict = NULL;
4489         if (format == NULL || !PyString_Check(format) || args == NULL) {
4490                 PyErr_BadInternalCall();
4491                 return NULL;
4492         }
4493         orig_args = args;
4494         fmt = PyString_AS_STRING(format);
4495         fmtcnt = PyString_GET_SIZE(format);
4496         reslen = rescnt = fmtcnt + 100;
4497         result = PyString_FromStringAndSize((char *)NULL, reslen);
4498         if (result == NULL)
4499                 return NULL;
4500         res = PyString_AsString(result);
4501         if (PyTuple_Check(args)) {
4502                 arglen = PyTuple_GET_SIZE(args);
4503                 argidx = 0;
4504         }
4505         else {
4506                 arglen = -1;
4507                 argidx = -2;
4508         }
4509         if (Py_Type(args)->tp_as_mapping && !PyTuple_Check(args) &&
4510             !PyObject_TypeCheck(args, &PyBaseString_Type))
4511                 dict = args;
4512         while (--fmtcnt >= 0) {
4513                 if (*fmt != '%') {
4514                         if (--rescnt < 0) {
4515                                 rescnt = fmtcnt + 100;
4516                                 reslen += rescnt;
4517                                 if (_PyString_Resize(&result, reslen) < 0)
4518                                         return NULL;
4519                                 res = PyString_AS_STRING(result)
4520                                         + reslen - rescnt;
4521                                 --rescnt;
4522                         }
4523                         *res++ = *fmt++;
4524                 }
4525                 else {
4526                         /* Got a format specifier */
4527                         int flags = 0;
4528                         Py_ssize_t width = -1;
4529                         int prec = -1;
4530                         int c = '\0';
4531                         int fill;
4532                         PyObject *v = NULL;
4533                         PyObject *temp = NULL;
4534                         char *pbuf;
4535                         int sign;
4536                         Py_ssize_t len;
4537                         char formatbuf[FORMATBUFLEN];
4538                              /* For format{float,int,char}() */
4539 #ifdef Py_USING_UNICODE
4540                         char *fmt_start = fmt;
4541                         Py_ssize_t argidx_start = argidx;
4542 #endif
4543
4544                         fmt++;
4545                         if (*fmt == '(') {
4546                                 char *keystart;
4547                                 Py_ssize_t keylen;
4548                                 PyObject *key;
4549                                 int pcount = 1;
4550
4551                                 if (dict == NULL) {
4552                                         PyErr_SetString(PyExc_TypeError,
4553                                                  "format requires a mapping");
4554                                         goto error;
4555                                 }
4556                                 ++fmt;
4557                                 --fmtcnt;
4558                                 keystart = fmt;
4559                                 /* Skip over balanced parentheses */
4560                                 while (pcount > 0 && --fmtcnt >= 0) {
4561                                         if (*fmt == ')')
4562                                                 --pcount;
4563                                         else if (*fmt == '(')
4564                                                 ++pcount;
4565                                         fmt++;
4566                                 }
4567                                 keylen = fmt - keystart - 1;
4568                                 if (fmtcnt < 0 || pcount > 0) {
4569                                         PyErr_SetString(PyExc_ValueError,
4570                                                    "incomplete format key");
4571                                         goto error;
4572                                 }
4573                                 key = PyString_FromStringAndSize(keystart,
4574                                                                  keylen);
4575                                 if (key == NULL)
4576                                         goto error;
4577                                 if (args_owned) {
4578                                         Py_DECREF(args);
4579                                         args_owned = 0;
4580                                 }
4581                                 args = PyObject_GetItem(dict, key);
4582                                 Py_DECREF(key);
4583                                 if (args == NULL) {
4584                                         goto error;
4585                                 }
4586                                 args_owned = 1;
4587                                 arglen = -1;
4588                                 argidx = -2;
4589                         }
4590                         while (--fmtcnt >= 0) {
4591                                 switch (c = *fmt++) {
4592                                 case '-': flags |= F_LJUST; continue;
4593                                 case '+': flags |= F_SIGN; continue;
4594                                 case ' ': flags |= F_BLANK; continue;
4595                                 case '#': flags |= F_ALT; continue;
4596                                 case '0': flags |= F_ZERO; continue;
4597                                 }
4598                                 break;
4599                         }
4600                         if (c == '*') {
4601                                 v = getnextarg(args, arglen, &argidx);
4602                                 if (v == NULL)
4603                                         goto error;
4604                                 if (!PyInt_Check(v)) {
4605                                         PyErr_SetString(PyExc_TypeError,
4606                                                         "* wants int");
4607                                         goto error;
4608                                 }
4609                                 width = PyInt_AsLong(v);
4610                                 if (width < 0) {
4611                                         flags |= F_LJUST;
4612                                         width = -width;
4613                                 }
4614                                 if (--fmtcnt >= 0)
4615                                         c = *fmt++;
4616                         }
4617                         else if (c >= 0 && isdigit(c)) {
4618                                 width = c - '0';
4619                                 while (--fmtcnt >= 0) {
4620                                         c = Py_CHARMASK(*fmt++);
4621                                         if (!isdigit(c))
4622                                                 break;
4623                                         if ((width*10) / 10 != width) {
4624                                                 PyErr_SetString(
4625                                                         PyExc_ValueError,
4626                                                         "width too big");
4627                                                 goto error;
4628                                         }
4629                                         width = width*10 + (c - '0');
4630                                 }
4631                         }
4632                         if (c == '.') {
4633                                 prec = 0;
4634                                 if (--fmtcnt >= 0)
4635                                         c = *fmt++;
4636                                 if (c == '*') {
4637                                         v = getnextarg(args, arglen, &argidx);
4638                                         if (v == NULL)
4639                                                 goto error;
4640                                         if (!PyInt_Check(v)) {
4641                                                 PyErr_SetString(
4642                                                         PyExc_TypeError,
4643                                                         "* wants int");
4644                                                 goto error;
4645                                         }
4646                                         prec = PyInt_AsLong(v);
4647                                         if (prec < 0)
4648                                                 prec = 0;
4649                                         if (--fmtcnt >= 0)
4650                                                 c = *fmt++;
4651                                 }
4652                                 else if (c >= 0 && isdigit(c)) {
4653                                         prec = c - '0';
4654                                         while (--fmtcnt >= 0) {
4655                                                 c = Py_CHARMASK(*fmt++);
4656                                                 if (!isdigit(c))
4657                                                         break;
4658                                                 if ((prec*10) / 10 != prec) {
4659                                                         PyErr_SetString(
4660                                                             PyExc_ValueError,
4661                                                             "prec too big");
4662                                                         goto error;
4663                                                 }
4664                                                 prec = prec*10 + (c - '0');
4665                                         }
4666                                 }
4667                         } /* prec */
4668                         if (fmtcnt >= 0) {
4669                                 if (c == 'h' || c == 'l' || c == 'L') {
4670                                         if (--fmtcnt >= 0)
4671                                                 c = *fmt++;
4672                                 }
4673                         }
4674                         if (fmtcnt < 0) {
4675                                 PyErr_SetString(PyExc_ValueError,
4676                                                 "incomplete format");
4677                                 goto error;
4678                         }
4679                         if (c != '%') {
4680                                 v = getnextarg(args, arglen, &argidx);
4681                                 if (v == NULL)
4682                                         goto error;
4683                         }
4684                         sign = 0;
4685                         fill = ' ';
4686                         switch (c) {
4687                         case '%':
4688                                 pbuf = "%";
4689                                 len = 1;
4690                                 break;
4691                         case 's':
4692 #ifdef Py_USING_UNICODE
4693                                 if (PyUnicode_Check(v)) {
4694                                         fmt = fmt_start;
4695                                         argidx = argidx_start;
4696                                         goto unicode;
4697                                 }
4698 #endif
4699                                 temp = _PyObject_Str(v);
4700 #ifdef Py_USING_UNICODE
4701                                 if (temp != NULL && PyUnicode_Check(temp)) {
4702                                         Py_DECREF(temp);
4703                                         fmt = fmt_start;
4704                                         argidx = argidx_start;
4705                                         goto unicode;
4706                                 }
4707 #endif
4708                                 /* Fall through */
4709                         case 'r':
4710                                 if (c == 'r')
4711                                         temp = PyObject_Repr(v);
4712                                 if (temp == NULL)
4713                                         goto error;
4714                                 if (!PyString_Check(temp)) {
4715                                         PyErr_SetString(PyExc_TypeError,
4716                                           "%s argument has non-string str()");
4717                                         Py_DECREF(temp);
4718                                         goto error;
4719                                 }
4720                                 pbuf = PyString_AS_STRING(temp);
4721                                 len = PyString_GET_SIZE(temp);
4722                                 if (prec >= 0 && len > prec)
4723                                         len = prec;
4724                                 break;
4725                         case 'i':
4726                         case 'd':
4727                         case 'u':
4728                         case 'o':
4729                         case 'x':
4730                         case 'X':
4731                                 if (c == 'i')
4732                                         c = 'd';
4733                                 if (PyLong_Check(v)) {
4734                                         int ilen;
4735                                         temp = _PyString_FormatLong(v, flags,
4736                                                 prec, c, &pbuf, &ilen);
4737                                         len = ilen;
4738                                         if (!temp)
4739                                                 goto error;
4740                                         sign = 1;
4741                                 }
4742                                 else {
4743                                         pbuf = formatbuf;
4744                                         len = formatint(pbuf,
4745                                                         sizeof(formatbuf),
4746                                                         flags, prec, c, v);
4747                                         if (len < 0)
4748                                                 goto error;
4749                                         sign = 1;
4750                                 }
4751                                 if (flags & F_ZERO)
4752                                         fill = '0';
4753                                 break;
4754                         case 'e':
4755                         case 'E':
4756                         case 'f':
4757                         case 'F':
4758                         case 'g':
4759                         case 'G':
4760                                 if (c == 'F')
4761                                         c = 'f';
4762                                 pbuf = formatbuf;
4763                                 len = formatfloat(pbuf, sizeof(formatbuf),
4764                                                   flags, prec, c, v);
4765                                 if (len < 0)
4766                                         goto error;
4767                                 sign = 1;
4768                                 if (flags & F_ZERO)
4769                                         fill = '0';
4770                                 break;
4771                         case 'c':
4772 #ifdef Py_USING_UNICODE
4773                                 if (PyUnicode_Check(v)) {
4774                                         fmt = fmt_start;
4775                                         argidx = argidx_start;
4776                                         goto unicode;
4777                                 }
4778 #endif
4779                                 pbuf = formatbuf;
4780                                 len = formatchar(pbuf, sizeof(formatbuf), v);
4781                                 if (len < 0)
4782                                         goto error;
4783                                 break;
4784                         default:
4785                                 PyErr_Format(PyExc_ValueError,
4786                                   "unsupported format character '%c' (0x%x) "
4787                                   "at index %zd",
4788                                   c, c,
4789                                   (Py_ssize_t)(fmt - 1 -
4790                                                PyString_AsString(format)));
4791                                 goto error;
4792                         }
4793                         if (sign) {
4794                                 if (*pbuf == '-' || *pbuf == '+') {
4795                                         sign = *pbuf++;
4796                                         len--;
4797                                 }
4798                                 else if (flags & F_SIGN)
4799                                         sign = '+';
4800                                 else if (flags & F_BLANK)
4801                                         sign = ' ';
4802                                 else
4803                                         sign = 0;
4804                         }
4805                         if (width < len)
4806                                 width = len;
4807                         if (rescnt - (sign != 0) < width) {
4808                                 reslen -= rescnt;
4809                                 rescnt = width + fmtcnt + 100;
4810                                 reslen += rescnt;
4811                                 if (reslen < 0) {
4812                                         Py_DECREF(result);
4813                                         Py_XDECREF(temp);
4814                                         return PyErr_NoMemory();
4815                                 }
4816                                 if (_PyString_Resize(&result, reslen) < 0) {
4817                                         Py_XDECREF(temp);
4818                                         return NULL;
4819                                 }
4820                                 res = PyString_AS_STRING(result)
4821                                         + reslen - rescnt;
4822                         }
4823                         if (sign) {
4824                                 if (fill != ' ')
4825                                         *res++ = sign;
4826                                 rescnt--;
4827                                 if (width > len)
4828                                         width--;
4829                         }
4830                         if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4831                                 assert(pbuf[0] == '0');
4832                                 assert(pbuf[1] == c);
4833                                 if (fill != ' ') {
4834                                         *res++ = *pbuf++;
4835                                         *res++ = *pbuf++;
4836                                 }
4837                                 rescnt -= 2;
4838                                 width -= 2;
4839                                 if (width < 0)
4840                                         width = 0;
4841                                 len -= 2;
4842                         }
4843                         if (width > len && !(flags & F_LJUST)) {
4844                                 do {
4845                                         --rescnt;
4846                                         *res++ = fill;
4847                                 } while (--width > len);
4848                         }
4849                         if (fill == ' ') {
4850                                 if (sign)
4851                                         *res++ = sign;
4852                                 if ((flags & F_ALT) &&
4853                                     (c == 'x' || c == 'X')) {
4854                                         assert(pbuf[0] == '0');
4855                                         assert(pbuf[1] == c);
4856                                         *res++ = *pbuf++;
4857                                         *res++ = *pbuf++;
4858                                 }
4859                         }
4860                         Py_MEMCPY(res, pbuf, len);
4861                         res += len;
4862                         rescnt -= len;
4863                         while (--width >= len) {
4864                                 --rescnt;
4865                                 *res++ = ' ';
4866                         }
4867                         if (dict && (argidx < arglen) && c != '%') {
4868                                 PyErr_SetString(PyExc_TypeError,
4869                                            "not all arguments converted during string formatting");
4870                                 Py_XDECREF(temp);
4871                                 goto error;
4872                         }
4873                         Py_XDECREF(temp);
4874                 } /* '%' */
4875         } /* until end */
4876         if (argidx < arglen && !dict) {
4877                 PyErr_SetString(PyExc_TypeError,
4878                                 "not all arguments converted during string formatting");
4879                 goto error;
4880         }
4881         if (args_owned) {
4882                 Py_DECREF(args);
4883         }
4884         _PyString_Resize(&result, reslen - rescnt);
4885         return result;
4886
4887 #ifdef Py_USING_UNICODE
4888  unicode:
4889         if (args_owned) {
4890                 Py_DECREF(args);
4891                 args_owned = 0;
4892         }
4893         /* Fiddle args right (remove the first argidx arguments) */
4894         if (PyTuple_Check(orig_args) && argidx > 0) {
4895                 PyObject *v;
4896                 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
4897                 v = PyTuple_New(n);
4898                 if (v == NULL)
4899                         goto error;
4900                 while (--n >= 0) {
4901                         PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4902                         Py_INCREF(w);
4903                         PyTuple_SET_ITEM(v, n, w);
4904                 }
4905                 args = v;
4906         } else {
4907                 Py_INCREF(orig_args);
4908                 args = orig_args;
4909         }
4910         args_owned = 1;
4911         /* Take what we have of the result and let the Unicode formatting
4912            function format the rest of the input. */
4913         rescnt = res - PyString_AS_STRING(result);
4914         if (_PyString_Resize(&result, rescnt))
4915                 goto error;
4916         fmtcnt = PyString_GET_SIZE(format) - \
4917                  (fmt - PyString_AS_STRING(format));
4918         format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4919         if (format == NULL)
4920                 goto error;
4921         v = PyUnicode_Format(format, args);
4922         Py_DECREF(format);
4923         if (v == NULL)
4924                 goto error;
4925         /* Paste what we have (result) to what the Unicode formatting
4926            function returned (v) and return the result (or error) */
4927         w = PyUnicode_Concat(result, v);
4928         Py_DECREF(result);
4929         Py_DECREF(v);
4930         Py_DECREF(args);
4931         return w;
4932 #endif /* Py_USING_UNICODE */
4933
4934  error:
4935         Py_DECREF(result);
4936         if (args_owned) {
4937                 Py_DECREF(args);
4938         }
4939         return NULL;
4940 }
4941
4942 void
4943 PyString_InternInPlace(PyObject **p)
4944 {
4945         register PyStringObject *s = (PyStringObject *)(*p);
4946         PyObject *t;
4947         if (s == NULL || !PyString_Check(s))
4948                 Py_FatalError("PyString_InternInPlace: strings only please!");
4949         /* If it's a string subclass, we don't really know what putting
4950            it in the interned dict might do. */
4951         if (!PyString_CheckExact(s))
4952                 return;
4953         if (PyString_CHECK_INTERNED(s))
4954                 return;
4955         if (interned == NULL) {
4956                 interned = PyDict_New();
4957                 if (interned == NULL) {
4958                         PyErr_Clear(); /* Don't leave an exception */
4959                         return;
4960                 }
4961         }
4962         t = PyDict_GetItem(interned, (PyObject *)s);
4963         if (t) {
4964                 Py_INCREF(t);
4965                 Py_DECREF(*p);
4966                 *p = t;
4967                 return;
4968         }
4969
4970         if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
4971                 PyErr_Clear();
4972                 return;
4973         }
4974         /* The two references in interned are not counted by refcnt.
4975            The string deallocator will take care of this */
4976         Py_Refcnt(s) -= 2;
4977         PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
4978 }
4979
4980 void
4981 PyString_InternImmortal(PyObject **p)
4982 {
4983         PyString_InternInPlace(p);
4984         if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4985                 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4986                 Py_INCREF(*p);
4987         }
4988 }
4989
4990
4991 PyObject *
4992 PyString_InternFromString(const char *cp)
4993 {
4994         PyObject *s = PyString_FromString(cp);
4995         if (s == NULL)
4996                 return NULL;
4997         PyString_InternInPlace(&s);
4998         return s;
4999 }
5000
5001 void
5002 PyString_Fini(void)
5003 {
5004         int i;
5005         for (i = 0; i < UCHAR_MAX + 1; i++) {
5006                 Py_XDECREF(characters[i]);
5007                 characters[i] = NULL;
5008         }
5009         Py_XDECREF(nullstring);
5010         nullstring = NULL;
5011 }
5012
5013 void _Py_ReleaseInternedStrings(void)
5014 {
5015         PyObject *keys;
5016         PyStringObject *s;
5017         Py_ssize_t i, n;
5018         Py_ssize_t immortal_size = 0, mortal_size = 0;
5019
5020         if (interned == NULL || !PyDict_Check(interned))
5021                 return;
5022         keys = PyDict_Keys(interned);
5023         if (keys == NULL || !PyList_Check(keys)) {
5024                 PyErr_Clear();
5025                 return;
5026         }
5027
5028         /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5029            detector, interned strings are not forcibly deallocated; rather, we
5030            give them their stolen references back, and then clear and DECREF
5031            the interned dict. */
5032
5033         n = PyList_GET_SIZE(keys);
5034         fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
5035                 n);
5036         for (i = 0; i < n; i++) {
5037                 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
5038                 switch (s->ob_sstate) {
5039                 case SSTATE_NOT_INTERNED:
5040                         /* XXX Shouldn't happen */
5041                         break;
5042                 case SSTATE_INTERNED_IMMORTAL:
5043                         Py_Refcnt(s) += 1;
5044                         immortal_size += Py_Size(s);
5045                         break;
5046                 case SSTATE_INTERNED_MORTAL:
5047                         Py_Refcnt(s) += 2;
5048                         mortal_size += Py_Size(s);
5049                         break;
5050                 default:
5051                         Py_FatalError("Inconsistent interned string state.");
5052                 }
5053                 s->ob_sstate = SSTATE_NOT_INTERNED;
5054         }
5055         fprintf(stderr, "total size of all interned strings: "
5056                         "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
5057                         "mortal/immortal\n", mortal_size, immortal_size);
5058         Py_DECREF(keys);
5059         PyDict_Clear(interned);
5060         Py_DECREF(interned);
5061         interned = NULL;
5062 }