Objects/stringobject.c

   1 /* String (str/bytes) object implementation */
   2
   3 #define PY_SSIZE_T_CLEAN
   4
   5 #include "Python.h"
   6 #include <ctype.h>
   7
   8 #ifdef COUNT_ALLOCS
   9 int null_strings, one_strings;
  10 #endif
  11
  12 static PyStringObject *characters[UCHAR_MAX + 1];
  13 static PyStringObject *nullstring;
  14
  15 /* This dictionary holds all interned strings.  Note that references to
  16    strings in this dictionary are *not* counted in the string's ob_refcnt.
  17    When the interned string reaches a refcnt of 0 the string deallocation
  18    function will delete the reference from this dictionary.
  19
  20    Another way to look at this is that to say that the actual reference
  21    count of a string is:  s->ob_refcnt + (s->ob_sstate?2:0)
  22 */
  23 static PyObject *interned;
  24
  25 /*
  26    For both PyString_FromString() and PyString_FromStringAndSize(), the
  27    parameter `size' denotes number of characters to allocate, not counting any
  28    null terminating character.
  29
  30    For PyString_FromString(), the parameter `str' points to a null-terminated
  31    string containing exactly `size' bytes.
  32
  33    For PyString_FromStringAndSize(), the parameter the parameter `str' is
  34    either NULL or else points to a string containing at least `size' bytes.
  35    For PyString_FromStringAndSize(), the string in the `str' parameter does
  36    not have to be null-terminated.  (Therefore it is safe to construct a
  37    substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
  38    If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
  39    bytes (setting the last byte to the null terminating character) and you can
  40    fill in the data yourself.  If `str' is non-NULL then the resulting
  41    PyString object must be treated as immutable and you must not fill in nor
  42    alter the data yourself, since the strings may be shared.
  43
  44    The PyObject member `op->ob_size', which denotes the number of "extra
  45    items" in a variable-size object, will contain the number of bytes
  46    allocated for string data, not counting the null terminating character.  It
  47    is therefore equal to the equal to the `size' parameter (for
  48    PyString_FromStringAndSize()) or the length of the string in the `str'
  49    parameter (for PyString_FromString()).
  50 */
  51 PyObject *
  52 PyString_FromStringAndSize(const char *str, Py_ssize_t size)
  53 {
  54         register PyStringObject *op;
  55         if (size < 0) {
  56                 PyErr_SetString(PyExc_SystemError,
  57                     "Negative size passed to PyString_FromStringAndSize");
  58                 return NULL;
  59         }
  60         if (size == 0 && (op = nullstring) != NULL) {
  61 #ifdef COUNT_ALLOCS
  62                 null_strings++;
  63 #endif
  64                 Py_INCREF(op);
  65                 return (PyObject *)op;
  66         }
  67         if (size == 1 && str != NULL &&
  68             (op = characters[*str & UCHAR_MAX]) != NULL)
  69         {
  70 #ifdef COUNT_ALLOCS
  71                 one_strings++;
  72 #endif
  73                 Py_INCREF(op);
  74                 return (PyObject *)op;
  75         }
  76
  77         /* Inline PyObject_NewVar */
  78         op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
  79         if (op == NULL)
  80                 return PyErr_NoMemory();
  81         PyObject_INIT_VAR(op, &PyString_Type, size);
  82         op->ob_shash = -1;
  83         op->ob_sstate = SSTATE_NOT_INTERNED;
  84         if (str != NULL)
  85                 Py_MEMCPY(op->ob_sval, str, size);
  86         op->ob_sval[size] = '\0';
  87         /* share short strings */
  88         if (size == 0) {
  89                 PyObject *t = (PyObject *)op;
  90                 PyString_InternInPlace(&t);
  91                 op = (PyStringObject *)t;
  92                 nullstring = op;
  93                 Py_INCREF(op);
  94         } else if (size == 1 && str != NULL) {
  95                 PyObject *t = (PyObject *)op;
  96                 PyString_InternInPlace(&t);
  97                 op = (PyStringObject *)t;
  98                 characters[*str & UCHAR_MAX] = op;
  99                 Py_INCREF(op);
 100         }
 101         return (PyObject *) op;
 102 }
 103
 104 PyObject *
 105 PyString_FromString(const char *str)
 106 {
 107         register size_t size;
 108         register PyStringObject *op;
 109
 110         assert(str != NULL);
 111         size = strlen(str);
 112         if (size > PY_SSIZE_T_MAX) {
 113                 PyErr_SetString(PyExc_OverflowError,
 114                         "string is too long for a Python string");
 115                 return NULL;
 116         }
 117         if (size == 0 && (op = nullstring) != NULL) {
 118 #ifdef COUNT_ALLOCS
 119                 null_strings++;
 120 #endif
 121                 Py_INCREF(op);
 122                 return (PyObject *)op;
 123         }
 124         if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
 125 #ifdef COUNT_ALLOCS
 126                 one_strings++;
 127 #endif
 128                 Py_INCREF(op);
 129                 return (PyObject *)op;
 130         }
 131
 132         /* Inline PyObject_NewVar */
 133         op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
 134         if (op == NULL)
 135                 return PyErr_NoMemory();
 136         PyObject_INIT_VAR(op, &PyString_Type, size);
 137         op->ob_shash = -1;
 138         op->ob_sstate = SSTATE_NOT_INTERNED;
 139         Py_MEMCPY(op->ob_sval, str, size+1);
 140         /* share short strings */
 141         if (size == 0) {
 142                 PyObject *t = (PyObject *)op;
 143                 PyString_InternInPlace(&t);
 144                 op = (PyStringObject *)t;
 145                 nullstring = op;
 146                 Py_INCREF(op);
 147         } else if (size == 1) {
 148                 PyObject *t = (PyObject *)op;
 149                 PyString_InternInPlace(&t);
 150                 op = (PyStringObject *)t;
 151                 characters[*str & UCHAR_MAX] = op;
 152                 Py_INCREF(op);
 153         }
 154         return (PyObject *) op;
 155 }
 156
 157 PyObject *
 158 PyString_FromFormatV(const char *format, va_list vargs)
 159 {
 160         va_list count;
 161         Py_ssize_t n = 0;
 162         const char* f;
 163         char *s;
 164         PyObject* string;
 165
 166 #ifdef VA_LIST_IS_ARRAY
 167         Py_MEMCPY(count, vargs, sizeof(va_list));
 168 #else
 169 #ifdef  __va_copy
 170         __va_copy(count, vargs);
 171 #else
 172         count = vargs;
 173 #endif
 174 #endif
 175         /* step 1: figure out how large a buffer we need */
 176         for (f = format; *f; f++) {
 177                 if (*f == '%') {
 178                         const char* p = f;
 179                         while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
 180                                 ;
 181
 182                         /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
 183                          * they don't affect the amount of space we reserve.
 184                          */
 185                         if ((*f == 'l' || *f == 'z') &&
 186                                         (f[1] == 'd' || f[1] == 'u'))
 187                                 ++f;
 188
 189                         switch (*f) {
 190                         case 'c':
 191                                 (void)va_arg(count, int);
 192                                 /* fall through... */
 193                         case '%':
 194                                 n++;
 195                                 break;
 196                         case 'd': case 'u': case 'i': case 'x':
 197                                 (void) va_arg(count, int);
 198                                 /* 20 bytes is enough to hold a 64-bit
 199                                    integer.  Decimal takes the most space.
 200                                    This isn't enough for octal. */
 201                                 n += 20;
 202                                 break;
 203                         case 's':
 204                                 s = va_arg(count, char*);
 205                                 n += strlen(s);
 206                                 break;
 207                         case 'p':
 208                                 (void) va_arg(count, int);
 209                                 /* maximum 64-bit pointer representation:
 210                                  * 0xffffffffffffffff
 211                                  * so 19 characters is enough.
 212                                  * XXX I count 18 -- what's the extra for?
 213                                  */
 214                                 n += 19;
 215                                 break;
 216                         default:
 217                                 /* if we stumble upon an unknown
 218                                    formatting code, copy the rest of
 219                                    the format string to the output
 220                                    string. (we cannot just skip the
 221                                    code, since there's no way to know
 222                                    what's in the argument list) */
 223                                 n += strlen(p);
 224                                 goto expand;
 225                         }
 226                 } else
 227                         n++;
 228         }
 229  expand:
 230         /* step 2: fill the buffer */
 231         /* Since we've analyzed how much space we need for the worst case,
 232            use sprintf directly instead of the slower PyOS_snprintf. */
 233         string = PyString_FromStringAndSize(NULL, n);
 234         if (!string)
 235                 return NULL;
 236
 237         s = PyString_AsString(string);
 238
 239         for (f = format; *f; f++) {
 240                 if (*f == '%') {
 241                         const char* p = f++;
 242                         Py_ssize_t i;
 243                         int longflag = 0;
 244                         int size_tflag = 0;
 245                         /* parse the width.precision part (we're only
 246                            interested in the precision value, if any) */
 247                         n = 0;
 248                         while (isdigit(Py_CHARMASK(*f)))
 249                                 n = (n*10) + *f++ - '0';
 250                         if (*f == '.') {
 251                                 f++;
 252                                 n = 0;
 253                                 while (isdigit(Py_CHARMASK(*f)))
 254                                         n = (n*10) + *f++ - '0';
 255                         }
 256                         while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
 257                                 f++;
 258                         /* handle the long flag, but only for %ld and %lu.
 259                            others can be added when necessary. */
 260                         if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
 261                                 longflag = 1;
 262                                 ++f;
 263                         }
 264                         /* handle the size_t flag. */
 265                         if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
 266                                 size_tflag = 1;
 267                                 ++f;
 268                         }
 269
 270                         switch (*f) {
 271                         case 'c':
 272                                 *s++ = va_arg(vargs, int);
 273                                 break;
 274                         case 'd':
 275                                 if (longflag)
 276                                         sprintf(s, "%ld", va_arg(vargs, long));
 277                                 else if (size_tflag)
 278                                         sprintf(s, "%" PY_FORMAT_SIZE_T "d",
 279                                                 va_arg(vargs, Py_ssize_t));
 280                                 else
 281                                         sprintf(s, "%d", va_arg(vargs, int));
 282                                 s += strlen(s);
 283                                 break;
 284                         case 'u':
 285                                 if (longflag)
 286                                         sprintf(s, "%lu",
 287                                                 va_arg(vargs, unsigned long));
 288                                 else if (size_tflag)
 289                                         sprintf(s, "%" PY_FORMAT_SIZE_T "u",
 290                                                 va_arg(vargs, size_t));
 291                                 else
 292                                         sprintf(s, "%u",
 293                                                 va_arg(vargs, unsigned int));
 294                                 s += strlen(s);
 295                                 break;
 296                         case 'i':
 297                                 sprintf(s, "%i", va_arg(vargs, int));
 298                                 s += strlen(s);
 299                                 break;
 300                         case 'x':
 301                                 sprintf(s, "%x", va_arg(vargs, int));
 302                                 s += strlen(s);
 303                                 break;
 304                         case 's':
 305                                 p = va_arg(vargs, char*);
 306                                 i = strlen(p);
 307                                 if (n > 0 && i > n)
 308                                         i = n;
 309                                 Py_MEMCPY(s, p, i);
 310                                 s += i;
 311                                 break;
 312                         case 'p':
 313                                 sprintf(s, "%p", va_arg(vargs, void*));
 314                                 /* %p is ill-defined:  ensure leading 0x. */
 315                                 if (s[1] == 'X')
 316                                         s[1] = 'x';
 317                                 else if (s[1] != 'x') {
 318                                         memmove(s+2, s, strlen(s)+1);
 319                                         s[0] = '0';
 320                                         s[1] = 'x';
 321                                 }
 322                                 s += strlen(s);
 323                                 break;
 324                         case '%':
 325                                 *s++ = '%';
 326                                 break;
 327                         default:
 328                                 strcpy(s, p);
 329                                 s += strlen(s);
 330                                 goto end;
 331                         }
 332                 } else
 333                         *s++ = *f;
 334         }
 335
 336  end:
 337         _PyString_Resize(&string, s - PyString_AS_STRING(string));
 338         return string;
 339 }
 340
 341 PyObject *
 342 PyString_FromFormat(const char *format, ...)
 343 {
 344         PyObject* ret;
 345         va_list vargs;
 346
 347 #ifdef HAVE_STDARG_PROTOTYPES
 348         va_start(vargs, format);
 349 #else
 350         va_start(vargs);
 351 #endif
 352         ret = PyString_FromFormatV(format, vargs);
 353         va_end(vargs);
 354         return ret;
 355 }
 356
 357
 358 PyObject *PyString_Decode(const char *s,
 359                           Py_ssize_t size,
 360                           const char *encoding,
 361                           const char *errors)
 362 {
 363     PyObject *v, *str;
 364
 365     str = PyString_FromStringAndSize(s, size);
 366     if (str == NULL)
 367         return NULL;
 368     v = PyString_AsDecodedString(str, encoding, errors);
 369     Py_DECREF(str);
 370     return v;
 371 }
 372
 373 PyObject *PyString_AsDecodedObject(PyObject *str,
 374                                    const char *encoding,
 375                                    const char *errors)
 376 {
 377     PyObject *v;
 378
 379     if (!PyString_Check(str)) {
 380         PyErr_BadArgument();
 381         goto onError;
 382     }
 383
 384     if (encoding == NULL) {
 385 #ifdef Py_USING_UNICODE
 386         encoding = PyUnicode_GetDefaultEncoding();
 387 #else
 388         PyErr_SetString(PyExc_ValueError, "no encoding specified");
 389         goto onError;
 390 #endif
 391     }
 392
 393     /* Decode via the codec registry */
 394     v = PyCodec_Decode(str, encoding, errors);
 395     if (v == NULL)
 396         goto onError;
 397
 398     return v;
 399
 400  onError:
 401     return NULL;
 402 }
 403
 404 PyObject *PyString_AsDecodedString(PyObject *str,
 405                                    const char *encoding,
 406                                    const char *errors)
 407 {
 408     PyObject *v;
 409
 410     v = PyString_AsDecodedObject(str, encoding, errors);
 411     if (v == NULL)
 412         goto onError;
 413
 414 #ifdef Py_USING_UNICODE
 415     /* Convert Unicode to a string using the default encoding */
 416     if (PyUnicode_Check(v)) {
 417         PyObject *temp = v;
 418         v = PyUnicode_AsEncodedString(v, NULL, NULL);
 419         Py_DECREF(temp);
 420         if (v == NULL)
 421             goto onError;
 422     }
 423 #endif
 424     if (!PyString_Check(v)) {
 425         PyErr_Format(PyExc_TypeError,
 426                      "decoder did not return a string object (type=%.400s)",
 427                      Py_TYPE(v)->tp_name);
 428         Py_DECREF(v);
 429         goto onError;
 430     }
 431
 432     return v;
 433
 434  onError:
 435     return NULL;
 436 }
 437
 438 PyObject *PyString_Encode(const char *s,
 439                           Py_ssize_t size,
 440                           const char *encoding,
 441                           const char *errors)
 442 {
 443     PyObject *v, *str;
 444
 445     str = PyString_FromStringAndSize(s, size);
 446     if (str == NULL)
 447         return NULL;
 448     v = PyString_AsEncodedString(str, encoding, errors);
 449     Py_DECREF(str);
 450     return v;
 451 }
 452
 453 PyObject *PyString_AsEncodedObject(PyObject *str,
 454                                    const char *encoding,
 455                                    const char *errors)
 456 {
 457     PyObject *v;
 458
 459     if (!PyString_Check(str)) {
 460         PyErr_BadArgument();
 461         goto onError;
 462     }
 463
 464     if (encoding == NULL) {
 465 #ifdef Py_USING_UNICODE
 466         encoding = PyUnicode_GetDefaultEncoding();
 467 #else
 468         PyErr_SetString(PyExc_ValueError, "no encoding specified");
 469         goto onError;
 470 #endif
 471     }
 472
 473     /* Encode via the codec registry */
 474     v = PyCodec_Encode(str, encoding, errors);
 475     if (v == NULL)
 476         goto onError;
 477
 478     return v;
 479
 480  onError:
 481     return NULL;
 482 }
 483
 484 PyObject *PyString_AsEncodedString(PyObject *str,
 485                                    const char *encoding,
 486                                    const char *errors)
 487 {
 488     PyObject *v;
 489
 490     v = PyString_AsEncodedObject(str, encoding, errors);
 491     if (v == NULL)
 492         goto onError;
 493
 494 #ifdef Py_USING_UNICODE
 495     /* Convert Unicode to a string using the default encoding */
 496     if (PyUnicode_Check(v)) {
 497         PyObject *temp = v;
 498         v = PyUnicode_AsEncodedString(v, NULL, NULL);
 499         Py_DECREF(temp);
 500         if (v == NULL)
 501             goto onError;
 502     }
 503 #endif
 504     if (!PyString_Check(v)) {
 505         PyErr_Format(PyExc_TypeError,
 506                      "encoder did not return a string object (type=%.400s)",
 507                      Py_TYPE(v)->tp_name);
 508         Py_DECREF(v);
 509         goto onError;
 510     }
 511
 512     return v;
 513
 514  onError:
 515     return NULL;
 516 }
 517
 518 static void
 519 string_dealloc(PyObject *op)
 520 {
 521         switch (PyString_CHECK_INTERNED(op)) {
 522                 case SSTATE_NOT_INTERNED:
 523                         break;
 524
 525                 case SSTATE_INTERNED_MORTAL:
 526                         /* revive dead object temporarily for DelItem */
 527                         Py_REFCNT(op) = 3;
 528                         if (PyDict_DelItem(interned, op) != 0)
 529                                 Py_FatalError(
 530                                         "deletion of interned string failed");
 531                         break;
 532
 533                 case SSTATE_INTERNED_IMMORTAL:
 534                         Py_FatalError("Immortal interned string died.");
 535
 536                 default:
 537                         Py_FatalError("Inconsistent interned string state.");
 538         }
 539         Py_TYPE(op)->tp_free(op);
 540 }
 541
 542 /* Unescape a backslash-escaped string. If unicode is non-zero,
 543    the string is a u-literal. If recode_encoding is non-zero,
 544    the string is UTF-8 encoded and should be re-encoded in the
 545    specified encoding.  */
 546
 547 PyObject *PyString_DecodeEscape(const char *s,
 548                                 Py_ssize_t len,
 549                                 const char *errors,
 550                                 Py_ssize_t unicode,
 551                                 const char *recode_encoding)
 552 {
 553         int c;
 554         char *p, *buf;
 555         const char *end;
 556         PyObject *v;
 557         Py_ssize_t newlen = recode_encoding ? 4*len:len;
 558         v = PyString_FromStringAndSize((char *)NULL, newlen);
 559         if (v == NULL)
 560                 return NULL;
 561         p = buf = PyString_AsString(v);
 562         end = s + len;
 563         while (s < end) {
 564                 if (*s != '\\') {
 565                   non_esc:
 566 #ifdef Py_USING_UNICODE
 567                         if (recode_encoding && (*s & 0x80)) {
 568                                 PyObject *u, *w;
 569                                 char *r;
 570                                 const char* t;
 571                                 Py_ssize_t rn;
 572                                 t = s;
 573                                 /* Decode non-ASCII bytes as UTF-8. */
 574                                 while (t < end && (*t & 0x80)) t++;
 575                                 u = PyUnicode_DecodeUTF8(s, t - s, errors);
 576                                 if(!u) goto failed;
 577
 578                                 /* Recode them in target encoding. */
 579                                 w = PyUnicode_AsEncodedString(
 580                                         u, recode_encoding, errors);
 581                                 Py_DECREF(u);
 582                                 if (!w) goto failed;
 583
 584                                 /* Append bytes to output buffer. */
 585                                 assert(PyString_Check(w));
 586                                 r = PyString_AS_STRING(w);
 587                                 rn = PyString_GET_SIZE(w);
 588                                 Py_MEMCPY(p, r, rn);
 589                                 p += rn;
 590                                 Py_DECREF(w);
 591                                 s = t;
 592                         } else {
 593                                 *p++ = *s++;
 594                         }
 595 #else
 596                         *p++ = *s++;
 597 #endif
 598                         continue;
 599                 }
 600                 s++;
 601                 if (s==end) {
 602                         PyErr_SetString(PyExc_ValueError,
 603                                         "Trailing \\ in string");
 604                         goto failed;
 605                 }
 606                 switch (*s++) {
 607                 /* XXX This assumes ASCII! */
 608                 case '\n': break;
 609                 case '\\': *p++ = '\\'; break;
 610                 case '\'': *p++ = '\''; break;
 611                 case '\"': *p++ = '\"'; break;
 612                 case 'b': *p++ = '\b'; break;
 613                 case 'f': *p++ = '\014'; break; /* FF */
 614                 case 't': *p++ = '\t'; break;
 615                 case 'n': *p++ = '\n'; break;
 616                 case 'r': *p++ = '\r'; break;
 617                 case 'v': *p++ = '\013'; break; /* VT */
 618                 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
 619                 case '0': case '1': case '2': case '3':
 620                 case '4': case '5': case '6': case '7':
 621                         c = s[-1] - '0';
 622                         if (s < end && '0' <= *s && *s <= '7') {
 623                                 c = (c<<3) + *s++ - '0';
 624                                 if (s < end && '0' <= *s && *s <= '7')
 625                                         c = (c<<3) + *s++ - '0';
 626                         }
 627                         *p++ = c;
 628                         break;
 629                 case 'x':
 630                         if (s+1 < end &&
 631                             isxdigit(Py_CHARMASK(s[0])) &&
 632                             isxdigit(Py_CHARMASK(s[1])))
 633                         {
 634                                 unsigned int x = 0;
 635                                 c = Py_CHARMASK(*s);
 636                                 s++;
 637                                 if (isdigit(c))
 638                                         x = c - '0';
 639                                 else if (islower(c))
 640                                         x = 10 + c - 'a';
 641                                 else
 642                                         x = 10 + c - 'A';
 643                                 x = x << 4;
 644                                 c = Py_CHARMASK(*s);
 645                                 s++;
 646                                 if (isdigit(c))
 647                                         x += c - '0';
 648                                 else if (islower(c))
 649                                         x += 10 + c - 'a';
 650                                 else
 651                                         x += 10 + c - 'A';
 652                                 *p++ = x;
 653                                 break;
 654                         }
 655                         if (!errors || strcmp(errors, "strict") == 0) {
 656                                 PyErr_SetString(PyExc_ValueError,
 657                                                 "invalid \\x escape");
 658                                 goto failed;
 659                         }
 660                         if (strcmp(errors, "replace") == 0) {
 661                                 *p++ = '?';
 662                         } else if (strcmp(errors, "ignore") == 0)
 663                                 /* do nothing */;
 664                         else {
 665                                 PyErr_Format(PyExc_ValueError,
 666                                              "decoding error; "
 667                                              "unknown error handling code: %.400s",
 668                                              errors);
 669                                 goto failed;
 670                         }
 671 #ifndef Py_USING_UNICODE
 672                 case 'u':
 673                 case 'U':
 674                 case 'N':
 675                         if (unicode) {
 676                                 PyErr_SetString(PyExc_ValueError,
 677                                           "Unicode escapes not legal "
 678                                           "when Unicode disabled");
 679                                 goto failed;
 680                         }
 681 #endif
 682                 default:
 683                         *p++ = '\\';
 684                         s--;
 685                         goto non_esc; /* an arbitry number of unescaped
 686                                          UTF-8 bytes may follow. */
 687                 }
 688         }
 689         if (p-buf < newlen)
 690                 _PyString_Resize(&v, p - buf);
 691         return v;
 692   failed:
 693         Py_DECREF(v);
 694         return NULL;
 695 }
 696
 697 /* -------------------------------------------------------------------- */
 698 /* object api */
 699
 700 static Py_ssize_t
 701 string_getsize(register PyObject *op)
 702 {
 703         char *s;
 704         Py_ssize_t len;
 705         if (PyString_AsStringAndSize(op, &s, &len))
 706                 return -1;
 707         return len;
 708 }
 709
 710 static /*const*/ char *
 711 string_getbuffer(register PyObject *op)
 712 {
 713         char *s;
 714         Py_ssize_t len;
 715         if (PyString_AsStringAndSize(op, &s, &len))
 716                 return NULL;
 717         return s;
 718 }
 719
 720 Py_ssize_t
 721 PyString_Size(register PyObject *op)
 722 {
 723         if (!PyString_Check(op))
 724                 return string_getsize(op);
 725         return Py_SIZE(op);
 726 }
 727
 728 /*const*/ char *
 729 PyString_AsString(register PyObject *op)
 730 {
 731         if (!PyString_Check(op))
 732                 return string_getbuffer(op);
 733         return ((PyStringObject *)op) -> ob_sval;
 734 }
 735
 736 int
 737 PyString_AsStringAndSize(register PyObject *obj,
 738                          register char **s,
 739                          register Py_ssize_t *len)
 740 {
 741         if (s == NULL) {
 742                 PyErr_BadInternalCall();
 743                 return -1;
 744         }
 745
 746         if (!PyString_Check(obj)) {
 747 #ifdef Py_USING_UNICODE
 748                 if (PyUnicode_Check(obj)) {
 749                         obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
 750                         if (obj == NULL)
 751                                 return -1;
 752                 }
 753                 else
 754 #endif
 755                 {
 756                         PyErr_Format(PyExc_TypeError,
 757                                      "expected string or Unicode object, "
 758                                      "%.200s found", Py_TYPE(obj)->tp_name);
 759                         return -1;
 760                 }
 761         }
 762
 763         *s = PyString_AS_STRING(obj);
 764         if (len != NULL)
 765                 *len = PyString_GET_SIZE(obj);
 766         else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
 767                 PyErr_SetString(PyExc_TypeError,
 768                                 "expected string without null bytes");
 769                 return -1;
 770         }
 771         return 0;
 772 }
 773
 774 /* -------------------------------------------------------------------- */
 775 /* Methods */
 776
 777 #include "stringlib/stringdefs.h"
 778 #include "stringlib/fastsearch.h"
 779
 780 #include "stringlib/count.h"
 781 #include "stringlib/find.h"
 782 #include "stringlib/partition.h"
 783
 784 #define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
 785 #include "stringlib/localeutil.h"
 786
 787
 788
 789 static int
 790 string_print(PyStringObject *op, FILE *fp, int flags)
 791 {
 792         Py_ssize_t i, str_len;
 793         char c;
 794         int quote;
 795
 796         /* XXX Ought to check for interrupts when writing long strings */
 797         if (! PyString_CheckExact(op)) {
 798                 int ret;
 799                 /* A str subclass may have its own __str__ method. */
 800                 op = (PyStringObject *) PyObject_Str((PyObject *)op);
 801                 if (op == NULL)
 802                         return -1;
 803                 ret = string_print(op, fp, flags);
 804                 Py_DECREF(op);
 805                 return ret;
 806         }
 807         if (flags & Py_PRINT_RAW) {
 808                 char *data = op->ob_sval;
 809                 Py_ssize_t size = Py_SIZE(op);
 810                 Py_BEGIN_ALLOW_THREADS
 811                 while (size > INT_MAX) {
 812                         /* Very long strings cannot be written atomically.
 813                          * But don't write exactly INT_MAX bytes at a time
 814                          * to avoid memory aligment issues.
 815                          */
 816                         const int chunk_size = INT_MAX & ~0x3FFF;
 817                         fwrite(data, 1, chunk_size, fp);
 818                         data += chunk_size;
 819                         size -= chunk_size;
 820                 }
 821 #ifdef __VMS
 822                 if (size) fwrite(data, (int)size, 1, fp);
 823 #else
 824                 fwrite(data, 1, (int)size, fp);
 825 #endif
 826                 Py_END_ALLOW_THREADS
 827                 return 0;
 828         }
 829
 830         /* figure out which quote to use; single is preferred */
 831         quote = '\'';
 832         if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
 833             !memchr(op->ob_sval, '"', Py_SIZE(op)))
 834                 quote = '"';
 835
 836         str_len = Py_SIZE(op);
 837         Py_BEGIN_ALLOW_THREADS
 838         fputc(quote, fp);
 839         for (i = 0; i < str_len; i++) {
 840                 /* Since strings are immutable and the caller should have a
 841                 reference, accessing the interal buffer should not be an issue
 842                 with the GIL released. */
 843                 c = op->ob_sval[i];
 844                 if (c == quote || c == '\\')
 845                         fprintf(fp, "\\%c", c);
 846                 else if (c == '\t')
 847                         fprintf(fp, "\\t");
 848                 else if (c == '\n')
 849                         fprintf(fp, "\\n");
 850                 else if (c == '\r')
 851                         fprintf(fp, "\\r");
 852                 else if (c < ' ' || c >= 0x7f)
 853                         fprintf(fp, "\\x%02x", c & 0xff);
 854                 else
 855                         fputc(c, fp);
 856         }
 857         fputc(quote, fp);
 858         Py_END_ALLOW_THREADS
 859         return 0;
 860 }
 861
 862 PyObject *
 863 PyString_Repr(PyObject *obj, int smartquotes)
 864 {
 865         register PyStringObject* op = (PyStringObject*) obj;
 866         size_t newsize = 2 + 4 * Py_SIZE(op);
 867         PyObject *v;
 868         if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {
 869                 PyErr_SetString(PyExc_OverflowError,
 870                         "string is too large to make repr");
 871                 return NULL;
 872         }
 873         v = PyString_FromStringAndSize((char *)NULL, newsize);
 874         if (v == NULL) {
 875                 return NULL;
 876         }
 877         else {
 878                 register Py_ssize_t i;
 879                 register char c;
 880                 register char *p;
 881                 int quote;
 882
 883                 /* figure out which quote to use; single is preferred */
 884                 quote = '\'';
 885                 if (smartquotes &&
 886                     memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
 887                     !memchr(op->ob_sval, '"', Py_SIZE(op)))
 888                         quote = '"';
 889
 890                 p = PyString_AS_STRING(v);
 891                 *p++ = quote;
 892                 for (i = 0; i < Py_SIZE(op); i++) {
 893                         /* There's at least enough room for a hex escape
 894                            and a closing quote. */
 895                         assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
 896                         c = op->ob_sval[i];
 897                         if (c == quote || c == '\\')
 898                                 *p++ = '\\', *p++ = c;
 899                         else if (c == '\t')
 900                                 *p++ = '\\', *p++ = 't';
 901                         else if (c == '\n')
 902                                 *p++ = '\\', *p++ = 'n';
 903                         else if (c == '\r')
 904                                 *p++ = '\\', *p++ = 'r';
 905                         else if (c < ' ' || c >= 0x7f) {
 906                                 /* For performance, we don't want to call
 907                                    PyOS_snprintf here (extra layers of
 908                                    function call). */
 909                                 sprintf(p, "\\x%02x", c & 0xff);
 910                                 p += 4;
 911                         }
 912                         else
 913                                 *p++ = c;
 914                 }
 915                 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
 916                 *p++ = quote;
 917                 *p = '\0';
 918                 _PyString_Resize(
 919                         &v, (p - PyString_AS_STRING(v)));
 920                 return v;
 921         }
 922 }
 923
 924 static PyObject *
 925 string_repr(PyObject *op)
 926 {
 927         return PyString_Repr(op, 1);
 928 }
 929
 930 static PyObject *
 931 string_str(PyObject *s)
 932 {
 933         assert(PyString_Check(s));
 934         if (PyString_CheckExact(s)) {
 935                 Py_INCREF(s);
 936                 return s;
 937         }
 938         else {
 939                 /* Subtype -- return genuine string with the same value. */
 940                 PyStringObject *t = (PyStringObject *) s;
 941                 return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
 942         }
 943 }
 944
 945 static Py_ssize_t
 946 string_length(PyStringObject *a)
 947 {
 948         return Py_SIZE(a);
 949 }
 950
 951 static PyObject *
 952 string_concat(register PyStringObject *a, register PyObject *bb)
 953 {
 954         register Py_ssize_t size;
 955         register PyStringObject *op;
 956         if (!PyString_Check(bb)) {
 957 #ifdef Py_USING_UNICODE
 958                 if (PyUnicode_Check(bb))
 959                     return PyUnicode_Concat((PyObject *)a, bb);
 960 #endif
 961                 if (PyByteArray_Check(bb))
 962                     return PyByteArray_Concat((PyObject *)a, bb);
 963                 PyErr_Format(PyExc_TypeError,
 964                              "cannot concatenate 'str' and '%.200s' objects",
 965                              Py_TYPE(bb)->tp_name);
 966                 return NULL;
 967         }
 968 #define b ((PyStringObject *)bb)
 969         /* Optimize cases with empty left or right operand */
 970         if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
 971             PyString_CheckExact(a) && PyString_CheckExact(b)) {
 972                 if (Py_SIZE(a) == 0) {
 973                         Py_INCREF(bb);
 974                         return bb;
 975                 }
 976                 Py_INCREF(a);
 977                 return (PyObject *)a;
 978         }
 979         size = Py_SIZE(a) + Py_SIZE(b);
 980         if (size < 0) {
 981                 PyErr_SetString(PyExc_OverflowError,
 982                                 "strings are too large to concat");
 983                 return NULL;
 984         }
 985
 986         /* Inline PyObject_NewVar */
 987         op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
 988         if (op == NULL)
 989                 return PyErr_NoMemory();
 990         PyObject_INIT_VAR(op, &PyString_Type, size);
 991         op->ob_shash = -1;
 992         op->ob_sstate = SSTATE_NOT_INTERNED;
 993         Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
 994         Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
 995         op->ob_sval[size] = '\0';
 996         return (PyObject *) op;
 997 #undef b
 998 }
 999
1000 static PyObject *
1001 string_repeat(register PyStringObject *a, register Py_ssize_t n)
1002 {
1003         register Py_ssize_t i;
1004         register Py_ssize_t j;
1005         register Py_ssize_t size;
1006         register PyStringObject *op;
1007         size_t nbytes;
1008         if (n < 0)
1009                 n = 0;
1010         /* watch out for overflows:  the size can overflow int,
1011          * and the # of bytes needed can overflow size_t
1012          */
1013         size = Py_SIZE(a) * n;
1014         if (n && size / n != Py_SIZE(a)) {
1015                 PyErr_SetString(PyExc_OverflowError,
1016                         "repeated string is too long");
1017                 return NULL;
1018         }
1019         if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
1020                 Py_INCREF(a);
1021                 return (PyObject *)a;
1022         }
1023         nbytes = (size_t)size;
1024         if (nbytes + sizeof(PyStringObject) <= nbytes) {
1025                 PyErr_SetString(PyExc_OverflowError,
1026                         "repeated string is too long");
1027                 return NULL;
1028         }
1029         op = (PyStringObject *)
1030                 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
1031         if (op == NULL)
1032                 return PyErr_NoMemory();
1033         PyObject_INIT_VAR(op, &PyString_Type, size);
1034         op->ob_shash = -1;
1035         op->ob_sstate = SSTATE_NOT_INTERNED;
1036         op->ob_sval[size] = '\0';
1037         if (Py_SIZE(a) == 1 && n > 0) {
1038                 memset(op->ob_sval, a->ob_sval[0] , n);
1039                 return (PyObject *) op;
1040         }
1041         i = 0;
1042         if (i < size) {
1043                 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1044                 i = Py_SIZE(a);
1045         }
1046         while (i < size) {
1047                 j = (i <= size-i)  ?  i  :  size-i;
1048                 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1049                 i += j;
1050         }
1051         return (PyObject *) op;
1052 }
1053
1054 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1055
1056 static PyObject *
1057 string_slice(register PyStringObject *a, register Py_ssize_t i,
1058              register Py_ssize_t j)
1059      /* j -- may be negative! */
1060 {
1061         if (i < 0)
1062                 i = 0;
1063         if (j < 0)
1064                 j = 0; /* Avoid signed/unsigned bug in next line */
1065         if (j > Py_SIZE(a))
1066                 j = Py_SIZE(a);
1067         if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
1068                 /* It's the same as a */
1069                 Py_INCREF(a);
1070                 return (PyObject *)a;
1071         }
1072         if (j < i)
1073                 j = i;
1074         return PyString_FromStringAndSize(a->ob_sval + i, j-i);
1075 }
1076
1077 static int
1078 string_contains(PyObject *str_obj, PyObject *sub_obj)
1079 {
1080         if (!PyString_CheckExact(sub_obj)) {
1081 #ifdef Py_USING_UNICODE
1082                 if (PyUnicode_Check(sub_obj))
1083                         return PyUnicode_Contains(str_obj, sub_obj);
1084 #endif
1085                 if (!PyString_Check(sub_obj)) {
1086                         PyErr_Format(PyExc_TypeError,
1087                             "'in <string>' requires string as left operand, "
1088                             "not %.200s", Py_TYPE(sub_obj)->tp_name);
1089                         return -1;
1090                 }
1091         }
1092
1093         return stringlib_contains_obj(str_obj, sub_obj);
1094 }
1095
1096 static PyObject *
1097 string_item(PyStringObject *a, register Py_ssize_t i)
1098 {
1099         char pchar;
1100         PyObject *v;
1101         if (i < 0 || i >= Py_SIZE(a)) {
1102                 PyErr_SetString(PyExc_IndexError, "string index out of range");
1103                 return NULL;
1104         }
1105         pchar = a->ob_sval[i];
1106         v = (PyObject *)characters[pchar & UCHAR_MAX];
1107         if (v == NULL)
1108                 v = PyString_FromStringAndSize(&pchar, 1);
1109         else {
1110 #ifdef COUNT_ALLOCS
1111                 one_strings++;
1112 #endif
1113                 Py_INCREF(v);
1114         }
1115         return v;
1116 }
1117
1118 static PyObject*
1119 string_richcompare(PyStringObject *a, PyStringObject *b, int op)
1120 {
1121         int c;
1122         Py_ssize_t len_a, len_b;
1123         Py_ssize_t min_len;
1124         PyObject *result;
1125
1126         /* Make sure both arguments are strings. */
1127         if (!(PyString_Check(a) && PyString_Check(b))) {
1128                 result = Py_NotImplemented;
1129                 goto out;
1130         }
1131         if (a == b) {
1132                 switch (op) {
1133                 case Py_EQ:case Py_LE:case Py_GE:
1134                         result = Py_True;
1135                         goto out;
1136                 case Py_NE:case Py_LT:case Py_GT:
1137                         result = Py_False;
1138                         goto out;
1139                 }
1140         }
1141         if (op == Py_EQ) {
1142                 /* Supporting Py_NE here as well does not save
1143                    much time, since Py_NE is rarely used.  */
1144                 if (Py_SIZE(a) == Py_SIZE(b)
1145                     && (a->ob_sval[0] == b->ob_sval[0]
1146                         && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
1147                         result = Py_True;
1148                 } else {
1149                         result = Py_False;
1150                 }
1151                 goto out;
1152         }
1153         len_a = Py_SIZE(a); len_b = Py_SIZE(b);
1154         min_len = (len_a < len_b) ? len_a : len_b;
1155         if (min_len > 0) {
1156                 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1157                 if (c==0)
1158                         c = memcmp(a->ob_sval, b->ob_sval, min_len);
1159         } else
1160                 c = 0;
1161         if (c == 0)
1162                 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1163         switch (op) {
1164         case Py_LT: c = c <  0; break;
1165         case Py_LE: c = c <= 0; break;
1166         case Py_EQ: assert(0);  break; /* unreachable */
1167         case Py_NE: c = c != 0; break;
1168         case Py_GT: c = c >  0; break;
1169         case Py_GE: c = c >= 0; break;
1170         default:
1171                 result = Py_NotImplemented;
1172                 goto out;
1173         }
1174         result = c ? Py_True : Py_False;
1175   out:
1176         Py_INCREF(result);
1177         return result;
1178 }
1179
1180 int
1181 _PyString_Eq(PyObject *o1, PyObject *o2)
1182 {
1183         PyStringObject *a = (PyStringObject*) o1;
1184         PyStringObject *b = (PyStringObject*) o2;
1185         return Py_SIZE(a) == Py_SIZE(b)
1186           && *a->ob_sval == *b->ob_sval
1187           && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
1188 }
1189
1190 static long
1191 string_hash(PyStringObject *a)
1192 {
1193         register Py_ssize_t len;
1194         register unsigned char *p;
1195         register long x;
1196
1197         if (a->ob_shash != -1)
1198                 return a->ob_shash;
1199         len = Py_SIZE(a);
1200         p = (unsigned char *) a->ob_sval;
1201         x = *p << 7;
1202         while (--len >= 0)
1203                 x = (1000003*x) ^ *p++;
1204         x ^= Py_SIZE(a);
1205         if (x == -1)
1206                 x = -2;
1207         a->ob_shash = x;
1208         return x;
1209 }
1210
1211 static PyObject*
1212 string_subscript(PyStringObject* self, PyObject* item)
1213 {
1214         if (PyIndex_Check(item)) {
1215                 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1216                 if (i == -1 && PyErr_Occurred())
1217                         return NULL;
1218                 if (i < 0)
1219                         i += PyString_GET_SIZE(self);
1220                 return string_item(self, i);
1221         }
1222         else if (PySlice_Check(item)) {
1223                 Py_ssize_t start, stop, step, slicelength, cur, i;
1224                 char* source_buf;
1225                 char* result_buf;
1226                 PyObject* result;
1227
1228                 if (PySlice_GetIndicesEx((PySliceObject*)item,
1229                                  PyString_GET_SIZE(self),
1230                                  &start, &stop, &step, &slicelength) < 0) {
1231                         return NULL;
1232                 }
1233
1234                 if (slicelength <= 0) {
1235                         return PyString_FromStringAndSize("", 0);
1236                 }
1237                 else if (start == 0 && step == 1 &&
1238                          slicelength == PyString_GET_SIZE(self) &&
1239                          PyString_CheckExact(self)) {
1240                         Py_INCREF(self);
1241                         return (PyObject *)self;
1242                 }
1243                 else if (step == 1) {
1244                         return PyString_FromStringAndSize(
1245                                 PyString_AS_STRING(self) + start,
1246                                 slicelength);
1247                 }
1248                 else {
1249                         source_buf = PyString_AsString((PyObject*)self);
1250                         result_buf = (char *)PyMem_Malloc(slicelength);
1251                         if (result_buf == NULL)
1252                                 return PyErr_NoMemory();
1253
1254                         for (cur = start, i = 0; i < slicelength;
1255                              cur += step, i++) {
1256                                 result_buf[i] = source_buf[cur];
1257                         }
1258
1259                         result = PyString_FromStringAndSize(result_buf,
1260                                                             slicelength);
1261                         PyMem_Free(result_buf);
1262                         return result;
1263                 }
1264         }
1265         else {
1266                 PyErr_Format(PyExc_TypeError,
1267                              "string indices must be integers, not %.200s",
1268                              Py_TYPE(item)->tp_name);
1269                 return NULL;
1270         }
1271 }
1272
1273 static Py_ssize_t
1274 string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1275 {
1276         if ( index != 0 ) {
1277                 PyErr_SetString(PyExc_SystemError,
1278                                 "accessing non-existent string segment");
1279                 return -1;
1280         }
1281         *ptr = (void *)self->ob_sval;
1282         return Py_SIZE(self);
1283 }
1284
1285 static Py_ssize_t
1286 string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1287 {
1288         PyErr_SetString(PyExc_TypeError,
1289                         "Cannot use string as modifiable buffer");
1290         return -1;
1291 }
1292
1293 static Py_ssize_t
1294 string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
1295 {
1296         if ( lenp )
1297                 *lenp = Py_SIZE(self);
1298         return 1;
1299 }
1300
1301 static Py_ssize_t
1302 string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
1303 {
1304         if ( index != 0 ) {
1305                 PyErr_SetString(PyExc_SystemError,
1306                                 "accessing non-existent string segment");
1307                 return -1;
1308         }
1309         *ptr = self->ob_sval;
1310         return Py_SIZE(self);
1311 }
1312
1313 static int
1314 string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
1315 {
1316         return PyBuffer_FillInfo(view, (void *)self->ob_sval, Py_SIZE(self),
1317                                  0, flags);
1318 }
1319
1320 static PySequenceMethods string_as_sequence = {
1321         (lenfunc)string_length, /*sq_length*/
1322         (binaryfunc)string_concat, /*sq_concat*/
1323         (ssizeargfunc)string_repeat, /*sq_repeat*/
1324         (ssizeargfunc)string_item, /*sq_item*/
1325         (ssizessizeargfunc)string_slice, /*sq_slice*/
1326         0,              /*sq_ass_item*/
1327         0,              /*sq_ass_slice*/
1328         (objobjproc)string_contains /*sq_contains*/
1329 };
1330
1331 static PyMappingMethods string_as_mapping = {
1332         (lenfunc)string_length,
1333         (binaryfunc)string_subscript,
1334         0,
1335 };
1336
1337 static PyBufferProcs string_as_buffer = {
1338         (readbufferproc)string_buffer_getreadbuf,
1339         (writebufferproc)string_buffer_getwritebuf,
1340         (segcountproc)string_buffer_getsegcount,
1341         (charbufferproc)string_buffer_getcharbuf,
1342         (getbufferproc)string_buffer_getbuffer,
1343         0, /* XXX */
1344 };
1345
1346
1347 \f
1348 #define LEFTSTRIP 0
1349 #define RIGHTSTRIP 1
1350 #define BOTHSTRIP 2
1351
1352 /* Arrays indexed by above */
1353 static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1354
1355 #define STRIPNAME(i) (stripformat[i]+3)
1356
1357
1358 /* Don't call if length < 2 */
1359 #define Py_STRING_MATCH(target, offset, pattern, length)        \
1360   (target[offset] == pattern[0] &&                              \
1361    target[offset+length-1] == pattern[length-1] &&              \
1362    !memcmp(target+offset+1, pattern+1, length-2) )
1363
1364
1365 /* Overallocate the initial list to reduce the number of reallocs for small
1366    split sizes.  Eg, "A A A A A A A A A A".split() (10 elements) has three
1367    resizes, to sizes 4, 8, then 16.  Most observed string splits are for human
1368    text (roughly 11 words per line) and field delimited data (usually 1-10
1369    fields).  For large strings the split algorithms are bandwidth limited
1370    so increasing the preallocation likely will not improve things.*/
1371
1372 #define MAX_PREALLOC 12
1373
1374 /* 5 splits gives 6 elements */
1375 #define PREALLOC_SIZE(maxsplit) \
1376         (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1377
1378 #define SPLIT_APPEND(data, left, right)                         \
1379         str = PyString_FromStringAndSize((data) + (left),       \
1380                                          (right) - (left));     \
1381         if (str == NULL)                                        \
1382                 goto onError;                                   \
1383         if (PyList_Append(list, str)) {                         \
1384                 Py_DECREF(str);                                 \
1385                 goto onError;                                   \
1386         }                                                       \
1387         else                                                    \
1388                 Py_DECREF(str);
1389
1390 #define SPLIT_ADD(data, left, right) {                          \
1391         str = PyString_FromStringAndSize((data) + (left),       \
1392                                          (right) - (left));     \
1393         if (str == NULL)                                        \
1394                 goto onError;                                   \
1395         if (count < MAX_PREALLOC) {                             \
1396                 PyList_SET_ITEM(list, count, str);              \
1397         } else {                                                \
1398                 if (PyList_Append(list, str)) {                 \
1399                         Py_DECREF(str);                         \
1400                         goto onError;                           \
1401                 }                                               \
1402                 else                                            \
1403                         Py_DECREF(str);                         \
1404         }                                                       \
1405         count++; }
1406
1407 /* Always force the list to the expected size. */
1408 #define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
1409
1410 #define SKIP_SPACE(s, i, len)    { while (i<len &&  isspace(Py_CHARMASK(s[i]))) i++; }
1411 #define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1412 #define RSKIP_SPACE(s, i)        { while (i>=0  &&  isspace(Py_CHARMASK(s[i]))) i--; }
1413 #define RSKIP_NONSPACE(s, i)     { while (i>=0  && !isspace(Py_CHARMASK(s[i]))) i--; }
1414
1415 Py_LOCAL_INLINE(PyObject *)
1416 split_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
1417 {
1418         const char *s = PyString_AS_STRING(self);
1419         Py_ssize_t i, j, count=0;
1420         PyObject *str;
1421         PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
1422
1423         if (list == NULL)
1424                 return NULL;
1425
1426         i = j = 0;
1427
1428         while (maxsplit-- > 0) {
1429                 SKIP_SPACE(s, i, len);
1430                 if (i==len) break;
1431                 j = i; i++;
1432                 SKIP_NONSPACE(s, i, len);
1433                 if (j == 0 && i == len && PyString_CheckExact(self)) {
1434                         /* No whitespace in self, so just use it as list[0] */
1435                         Py_INCREF(self);
1436                         PyList_SET_ITEM(list, 0, (PyObject *)self);
1437                         count++;
1438                         break;
1439                 }
1440                 SPLIT_ADD(s, j, i);
1441         }
1442
1443         if (i < len) {
1444                 /* Only occurs when maxsplit was reached */
1445                 /* Skip any remaining whitespace and copy to end of string */
1446                 SKIP_SPACE(s, i, len);
1447                 if (i != len)
1448                         SPLIT_ADD(s, i, len);
1449         }
1450         FIX_PREALLOC_SIZE(list);
1451         return list;
1452   onError:
1453         Py_DECREF(list);
1454         return NULL;
1455 }
1456
1457 Py_LOCAL_INLINE(PyObject *)
1458 split_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
1459 {
1460         const char *s = PyString_AS_STRING(self);
1461         register Py_ssize_t i, j, count=0;
1462         PyObject *str;
1463         PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
1464
1465         if (list == NULL)
1466                 return NULL;
1467
1468         i = j = 0;
1469         while ((j < len) && (maxcount-- > 0)) {
1470                 for(; j<len; j++) {
1471                         /* I found that using memchr makes no difference */
1472                         if (s[j] == ch) {
1473                                 SPLIT_ADD(s, i, j);
1474                                 i = j = j + 1;
1475                                 break;
1476                         }
1477                 }
1478         }
1479         if (i == 0 && count == 0 && PyString_CheckExact(self)) {
1480                 /* ch not in self, so just use self as list[0] */
1481                 Py_INCREF(self);
1482                 PyList_SET_ITEM(list, 0, (PyObject *)self);
1483                 count++;
1484         }
1485         else if (i <= len) {
1486                 SPLIT_ADD(s, i, len);
1487         }
1488         FIX_PREALLOC_SIZE(list);
1489         return list;
1490
1491   onError:
1492         Py_DECREF(list);
1493         return NULL;
1494 }
1495
1496 PyDoc_STRVAR(split__doc__,
1497 "S.split([sep [,maxsplit]]) -> list of strings\n\
1498 \n\
1499 Return a list of the words in the string S, using sep as the\n\
1500 delimiter string.  If maxsplit is given, at most maxsplit\n\
1501 splits are done. If sep is not specified or is None, any\n\
1502 whitespace string is a separator and empty strings are removed\n\
1503 from the result.");
1504
1505 static PyObject *
1506 string_split(PyStringObject *self, PyObject *args)
1507 {
1508         Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1509         Py_ssize_t maxsplit = -1, count=0;
1510         const char *s = PyString_AS_STRING(self), *sub;
1511         PyObject *list, *str, *subobj = Py_None;
1512 #ifdef USE_FAST
1513         Py_ssize_t pos;
1514 #endif
1515
1516         if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1517                 return NULL;
1518         if (maxsplit < 0)
1519                 maxsplit = PY_SSIZE_T_MAX;
1520         if (subobj == Py_None)
1521                 return split_whitespace(self, len, maxsplit);
1522         if (PyString_Check(subobj)) {
1523                 sub = PyString_AS_STRING(subobj);
1524                 n = PyString_GET_SIZE(subobj);
1525         }
1526 #ifdef Py_USING_UNICODE
1527         else if (PyUnicode_Check(subobj))
1528                 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1529 #endif
1530         else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1531                 return NULL;
1532
1533         if (n == 0) {
1534                 PyErr_SetString(PyExc_ValueError, "empty separator");
1535                 return NULL;
1536         }
1537         else if (n == 1)
1538                 return split_char(self, len, sub[0], maxsplit);
1539
1540         list = PyList_New(PREALLOC_SIZE(maxsplit));
1541         if (list == NULL)
1542                 return NULL;
1543
1544 #ifdef USE_FAST
1545         i = j = 0;
1546         while (maxsplit-- > 0) {
1547                 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1548                 if (pos < 0)
1549                         break;
1550                 j = i+pos;
1551                 SPLIT_ADD(s, i, j);
1552                 i = j + n;
1553         }
1554 #else
1555         i = j = 0;
1556         while ((j+n <= len) && (maxsplit-- > 0)) {
1557                 for (; j+n <= len; j++) {
1558                         if (Py_STRING_MATCH(s, j, sub, n)) {
1559                                 SPLIT_ADD(s, i, j);
1560                                 i = j = j + n;
1561                                 break;
1562                         }
1563                 }
1564         }
1565 #endif
1566         SPLIT_ADD(s, i, len);
1567         FIX_PREALLOC_SIZE(list);
1568         return list;
1569
1570  onError:
1571         Py_DECREF(list);
1572         return NULL;
1573 }
1574
1575 PyDoc_STRVAR(partition__doc__,
1576 "S.partition(sep) -> (head, sep, tail)\n\
1577 \n\
1578 Searches for the separator sep in S, and returns the part before it,\n\
1579 the separator itself, and the part after it.  If the separator is not\n\
1580 found, returns S and two empty strings.");
1581
1582 static PyObject *
1583 string_partition(PyStringObject *self, PyObject *sep_obj)
1584 {
1585         const char *sep;
1586         Py_ssize_t sep_len;
1587
1588         if (PyString_Check(sep_obj)) {
1589                 sep = PyString_AS_STRING(sep_obj);
1590                 sep_len = PyString_GET_SIZE(sep_obj);
1591         }
1592 #ifdef Py_USING_UNICODE
1593         else if (PyUnicode_Check(sep_obj))
1594                 return PyUnicode_Partition((PyObject *) self, sep_obj);
1595 #endif
1596         else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1597                 return NULL;
1598
1599         return stringlib_partition(
1600                 (PyObject*) self,
1601                 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1602                 sep_obj, sep, sep_len
1603                 );
1604 }
1605
1606 PyDoc_STRVAR(rpartition__doc__,
1607 "S.rpartition(sep) -> (tail, sep, head)\n\
1608 \n\
1609 Searches for the separator sep in S, starting at the end of S, and returns\n\
1610 the part before it, the separator itself, and the part after it.  If the\n\
1611 separator is not found, returns two empty strings and S.");
1612
1613 static PyObject *
1614 string_rpartition(PyStringObject *self, PyObject *sep_obj)
1615 {
1616         const char *sep;
1617         Py_ssize_t sep_len;
1618
1619         if (PyString_Check(sep_obj)) {
1620                 sep = PyString_AS_STRING(sep_obj);
1621                 sep_len = PyString_GET_SIZE(sep_obj);
1622         }
1623 #ifdef Py_USING_UNICODE
1624         else if (PyUnicode_Check(sep_obj))
1625                 return PyUnicode_Partition((PyObject *) self, sep_obj);
1626 #endif
1627         else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1628                 return NULL;
1629
1630         return stringlib_rpartition(
1631                 (PyObject*) self,
1632                 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1633                 sep_obj, sep, sep_len
1634                 );
1635 }
1636
1637 Py_LOCAL_INLINE(PyObject *)
1638 rsplit_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
1639 {
1640         const char *s = PyString_AS_STRING(self);
1641         Py_ssize_t i, j, count=0;
1642         PyObject *str;
1643         PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
1644
1645         if (list == NULL)
1646                 return NULL;
1647
1648         i = j = len-1;
1649
1650         while (maxsplit-- > 0) {
1651                 RSKIP_SPACE(s, i);
1652                 if (i<0) break;
1653                 j = i; i--;
1654                 RSKIP_NONSPACE(s, i);
1655                 if (j == len-1 && i < 0 && PyString_CheckExact(self)) {
1656                         /* No whitespace in self, so just use it as list[0] */
1657                         Py_INCREF(self);
1658                         PyList_SET_ITEM(list, 0, (PyObject *)self);
1659                         count++;
1660                         break;
1661                 }
1662                 SPLIT_ADD(s, i + 1, j + 1);
1663         }
1664         if (i >= 0) {
1665                 /* Only occurs when maxsplit was reached */
1666                 /* Skip any remaining whitespace and copy to beginning of string */
1667                 RSKIP_SPACE(s, i);
1668                 if (i >= 0)
1669                         SPLIT_ADD(s, 0, i + 1);
1670
1671         }
1672         FIX_PREALLOC_SIZE(list);
1673         if (PyList_Reverse(list) < 0)
1674                 goto onError;
1675         return list;
1676   onError:
1677         Py_DECREF(list);
1678         return NULL;
1679 }
1680
1681 Py_LOCAL_INLINE(PyObject *)
1682 rsplit_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
1683 {
1684         const char *s = PyString_AS_STRING(self);
1685         register Py_ssize_t i, j, count=0;
1686         PyObject *str;
1687         PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
1688
1689         if (list == NULL)
1690                 return NULL;
1691
1692         i = j = len - 1;
1693         while ((i >= 0) && (maxcount-- > 0)) {
1694                 for (; i >= 0; i--) {
1695                         if (s[i] == ch) {
1696                                 SPLIT_ADD(s, i + 1, j + 1);
1697                                 j = i = i - 1;
1698                                 break;
1699                         }
1700                 }
1701         }
1702         if (i < 0 && count == 0 && PyString_CheckExact(self)) {
1703                 /* ch not in self, so just use self as list[0] */
1704                 Py_INCREF(self);
1705                 PyList_SET_ITEM(list, 0, (PyObject *)self);
1706                 count++;
1707         }
1708         else if (j >= -1) {
1709                 SPLIT_ADD(s, 0, j + 1);
1710         }
1711         FIX_PREALLOC_SIZE(list);
1712         if (PyList_Reverse(list) < 0)
1713                 goto onError;
1714         return list;
1715
1716  onError:
1717         Py_DECREF(list);
1718         return NULL;
1719 }
1720
1721 PyDoc_STRVAR(rsplit__doc__,
1722 "S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1723 \n\
1724 Return a list of the words in the string S, using sep as the\n\
1725 delimiter string, starting at the end of the string and working\n\
1726 to the front.  If maxsplit is given, at most maxsplit splits are\n\
1727 done. If sep is not specified or is None, any whitespace string\n\
1728 is a separator.");
1729
1730 static PyObject *
1731 string_rsplit(PyStringObject *self, PyObject *args)
1732 {
1733         Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1734         Py_ssize_t maxsplit = -1, count=0;
1735         const char *s, *sub;
1736         PyObject *list, *str, *subobj = Py_None;
1737
1738         if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1739                 return NULL;
1740         if (maxsplit < 0)
1741                 maxsplit = PY_SSIZE_T_MAX;
1742         if (subobj == Py_None)
1743                 return rsplit_whitespace(self, len, maxsplit);
1744         if (PyString_Check(subobj)) {
1745                 sub = PyString_AS_STRING(subobj);
1746                 n = PyString_GET_SIZE(subobj);
1747         }
1748 #ifdef Py_USING_UNICODE
1749         else if (PyUnicode_Check(subobj))
1750                 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1751 #endif
1752         else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1753                 return NULL;
1754
1755         if (n == 0) {
1756                 PyErr_SetString(PyExc_ValueError, "empty separator");
1757                 return NULL;
1758         }
1759         else if (n == 1)
1760                 return rsplit_char(self, len, sub[0], maxsplit);
1761
1762         list = PyList_New(PREALLOC_SIZE(maxsplit));
1763         if (list == NULL)
1764                 return NULL;
1765
1766         j = len;
1767         i = j - n;
1768
1769         s = PyString_AS_STRING(self);
1770         while ( (i >= 0) && (maxsplit-- > 0) ) {
1771                 for (; i>=0; i--) {
1772                         if (Py_STRING_MATCH(s, i, sub, n)) {
1773                                 SPLIT_ADD(s, i + n, j);
1774                                 j = i;
1775                                 i -= n;
1776                                 break;
1777                         }
1778                 }
1779         }
1780         SPLIT_ADD(s, 0, j);
1781         FIX_PREALLOC_SIZE(list);
1782         if (PyList_Reverse(list) < 0)
1783                 goto onError;
1784         return list;
1785
1786 onError:
1787         Py_DECREF(list);
1788         return NULL;
1789 }
1790
1791
1792 PyDoc_STRVAR(join__doc__,
1793 "S.join(sequence) -> string\n\
1794 \n\
1795 Return a string which is the concatenation of the strings in the\n\
1796 sequence.  The separator between elements is S.");
1797
1798 static PyObject *
1799 string_join(PyStringObject *self, PyObject *orig)
1800 {
1801         char *sep = PyString_AS_STRING(self);
1802         const Py_ssize_t seplen = PyString_GET_SIZE(self);
1803         PyObject *res = NULL;
1804         char *p;
1805         Py_ssize_t seqlen = 0;
1806         size_t sz = 0;
1807         Py_ssize_t i;
1808         PyObject *seq, *item;
1809
1810         seq = PySequence_Fast(orig, "");
1811         if (seq == NULL) {
1812                 return NULL;
1813         }
1814
1815         seqlen = PySequence_Size(seq);
1816         if (seqlen == 0) {
1817                 Py_DECREF(seq);
1818                 return PyString_FromString("");
1819         }
1820         if (seqlen == 1) {
1821                 item = PySequence_Fast_GET_ITEM(seq, 0);
1822                 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1823                         Py_INCREF(item);
1824                         Py_DECREF(seq);
1825                         return item;
1826                 }
1827         }
1828
1829         /* There are at least two things to join, or else we have a subclass
1830          * of the builtin types in the sequence.
1831          * Do a pre-pass to figure out the total amount of space we'll
1832          * need (sz), see whether any argument is absurd, and defer to
1833          * the Unicode join if appropriate.
1834          */
1835         for (i = 0; i < seqlen; i++) {
1836                 const size_t old_sz = sz;
1837                 item = PySequence_Fast_GET_ITEM(seq, i);
1838                 if (!PyString_Check(item)){
1839 #ifdef Py_USING_UNICODE
1840                         if (PyUnicode_Check(item)) {
1841                                 /* Defer to Unicode join.
1842                                  * CAUTION:  There's no gurantee that the
1843                                  * original sequence can be iterated over
1844                                  * again, so we must pass seq here.
1845                                  */
1846                                 PyObject *result;
1847                                 result = PyUnicode_Join((PyObject *)self, seq);
1848                                 Py_DECREF(seq);
1849                                 return result;
1850                         }
1851 #endif
1852                         PyErr_Format(PyExc_TypeError,
1853                                      "sequence item %zd: expected string,"
1854                                      " %.80s found",
1855                                      i, Py_TYPE(item)->tp_name);
1856                         Py_DECREF(seq);
1857                         return NULL;
1858                 }
1859                 sz += PyString_GET_SIZE(item);
1860                 if (i != 0)
1861                         sz += seplen;
1862                 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1863                         PyErr_SetString(PyExc_OverflowError,
1864                                 "join() result is too long for a Python string");
1865                         Py_DECREF(seq);
1866                         return NULL;
1867                 }
1868         }
1869
1870         /* Allocate result space. */
1871         res = PyString_FromStringAndSize((char*)NULL, sz);
1872         if (res == NULL) {
1873                 Py_DECREF(seq);
1874                 return NULL;
1875         }
1876
1877         /* Catenate everything. */
1878         p = PyString_AS_STRING(res);
1879         for (i = 0; i < seqlen; ++i) {
1880                 size_t n;
1881                 item = PySequence_Fast_GET_ITEM(seq, i);
1882                 n = PyString_GET_SIZE(item);
1883                 Py_MEMCPY(p, PyString_AS_STRING(item), n);
1884                 p += n;
1885                 if (i < seqlen - 1) {
1886                         Py_MEMCPY(p, sep, seplen);
1887                         p += seplen;
1888                 }
1889         }
1890
1891         Py_DECREF(seq);
1892         return res;
1893 }
1894
1895 PyObject *
1896 _PyString_Join(PyObject *sep, PyObject *x)
1897 {
1898         assert(sep != NULL && PyString_Check(sep));
1899         assert(x != NULL);
1900         return string_join((PyStringObject *)sep, x);
1901 }
1902
1903 Py_LOCAL_INLINE(void)
1904 string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1905 {
1906         if (*end > len)
1907                 *end = len;
1908         else if (*end < 0)
1909                 *end += len;
1910         if (*end < 0)
1911                 *end = 0;
1912         if (*start < 0)
1913                 *start += len;
1914         if (*start < 0)
1915                 *start = 0;
1916 }
1917
1918 Py_LOCAL_INLINE(Py_ssize_t)
1919 string_find_internal(PyStringObject *self, PyObject *args, int dir)
1920 {
1921         PyObject *subobj;
1922         const char *sub;
1923         Py_ssize_t sub_len;
1924         Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1925         PyObject *obj_start=Py_None, *obj_end=Py_None;
1926
1927         if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1928                 &obj_start, &obj_end))
1929                 return -2;
1930         /* To support None in "start" and "end" arguments, meaning
1931            the same as if they were not passed.
1932         */
1933         if (obj_start != Py_None)
1934                 if (!_PyEval_SliceIndex(obj_start, &start))
1935                 return -2;
1936         if (obj_end != Py_None)
1937                 if (!_PyEval_SliceIndex(obj_end, &end))
1938                 return -2;
1939
1940         if (PyString_Check(subobj)) {
1941                 sub = PyString_AS_STRING(subobj);
1942                 sub_len = PyString_GET_SIZE(subobj);
1943         }
1944 #ifdef Py_USING_UNICODE
1945         else if (PyUnicode_Check(subobj))
1946                 return PyUnicode_Find(
1947                         (PyObject *)self, subobj, start, end, dir);
1948 #endif
1949         else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1950                 /* XXX - the "expected a character buffer object" is pretty
1951                    confusing for a non-expert.  remap to something else ? */
1952                 return -2;
1953
1954         if (dir > 0)
1955                 return stringlib_find_slice(
1956                         PyString_AS_STRING(self), PyString_GET_SIZE(self),
1957                         sub, sub_len, start, end);
1958         else
1959                 return stringlib_rfind_slice(
1960                         PyString_AS_STRING(self), PyString_GET_SIZE(self),
1961                         sub, sub_len, start, end);
1962 }
1963
1964
1965 PyDoc_STRVAR(find__doc__,
1966 "S.find(sub [,start [,end]]) -> int\n\
1967 \n\
1968 Return the lowest index in S where substring sub is found,\n\
1969 such that sub is contained within s[start:end].  Optional\n\
1970 arguments start and end are interpreted as in slice notation.\n\
1971 \n\
1972 Return -1 on failure.");
1973
1974 static PyObject *
1975 string_find(PyStringObject *self, PyObject *args)
1976 {
1977         Py_ssize_t result = string_find_internal(self, args, +1);
1978         if (result == -2)
1979                 return NULL;
1980         return PyInt_FromSsize_t(result);
1981 }
1982
1983
1984 PyDoc_STRVAR(index__doc__,
1985 "S.index(sub [,start [,end]]) -> int\n\
1986 \n\
1987 Like S.find() but raise ValueError when the substring is not found.");
1988
1989 static PyObject *
1990 string_index(PyStringObject *self, PyObject *args)
1991 {
1992         Py_ssize_t result = string_find_internal(self, args, +1);
1993         if (result == -2)
1994                 return NULL;
1995         if (result == -1) {
1996                 PyErr_SetString(PyExc_ValueError,
1997                                 "substring not found");
1998                 return NULL;
1999         }
2000         return PyInt_FromSsize_t(result);
2001 }
2002
2003
2004 PyDoc_STRVAR(rfind__doc__,
2005 "S.rfind(sub [,start [,end]]) -> int\n\
2006 \n\
2007 Return the highest index in S where substring sub is found,\n\
2008 such that sub is contained within s[start:end].  Optional\n\
2009 arguments start and end are interpreted as in slice notation.\n\
2010 \n\
2011 Return -1 on failure.");
2012
2013 static PyObject *
2014 string_rfind(PyStringObject *self, PyObject *args)
2015 {
2016         Py_ssize_t result = string_find_internal(self, args, -1);
2017         if (result == -2)
2018                 return NULL;
2019         return PyInt_FromSsize_t(result);
2020 }
2021
2022
2023 PyDoc_STRVAR(rindex__doc__,
2024 "S.rindex(sub [,start [,end]]) -> int\n\
2025 \n\
2026 Like S.rfind() but raise ValueError when the substring is not found.");
2027
2028 static PyObject *
2029 string_rindex(PyStringObject *self, PyObject *args)
2030 {
2031         Py_ssize_t result = string_find_internal(self, args, -1);
2032         if (result == -2)
2033                 return NULL;
2034         if (result == -1) {
2035                 PyErr_SetString(PyExc_ValueError,
2036                                 "substring not found");
2037                 return NULL;
2038         }
2039         return PyInt_FromSsize_t(result);
2040 }
2041
2042
2043 Py_LOCAL_INLINE(PyObject *)
2044 do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
2045 {
2046         char *s = PyString_AS_STRING(self);
2047         Py_ssize_t len = PyString_GET_SIZE(self);
2048         char *sep = PyString_AS_STRING(sepobj);
2049         Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
2050         Py_ssize_t i, j;
2051
2052         i = 0;
2053         if (striptype != RIGHTSTRIP) {
2054                 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2055                         i++;
2056                 }
2057         }
2058
2059         j = len;
2060         if (striptype != LEFTSTRIP) {
2061                 do {
2062                         j--;
2063                 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2064                 j++;
2065         }
2066
2067         if (i == 0 && j == len && PyString_CheckExact(self)) {
2068                 Py_INCREF(self);
2069                 return (PyObject*)self;
2070         }
2071         else
2072                 return PyString_FromStringAndSize(s+i, j-i);
2073 }
2074
2075
2076 Py_LOCAL_INLINE(PyObject *)
2077 do_strip(PyStringObject *self, int striptype)
2078 {
2079         char *s = PyString_AS_STRING(self);
2080         Py_ssize_t len = PyString_GET_SIZE(self), i, j;
2081
2082         i = 0;
2083         if (striptype != RIGHTSTRIP) {
2084                 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2085                         i++;
2086                 }
2087         }
2088
2089         j = len;
2090         if (striptype != LEFTSTRIP) {
2091                 do {
2092                         j--;
2093                 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2094                 j++;
2095         }
2096
2097         if (i == 0 && j == len && PyString_CheckExact(self)) {
2098                 Py_INCREF(self);
2099                 return (PyObject*)self;
2100         }
2101         else
2102                 return PyString_FromStringAndSize(s+i, j-i);
2103 }
2104
2105
2106 Py_LOCAL_INLINE(PyObject *)
2107 do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2108 {
2109         PyObject *sep = NULL;
2110
2111         if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
2112                 return NULL;
2113
2114         if (sep != NULL && sep != Py_None) {
2115                 if (PyString_Check(sep))
2116                         return do_xstrip(self, striptype, sep);
2117 #ifdef Py_USING_UNICODE
2118                 else if (PyUnicode_Check(sep)) {
2119                         PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2120                         PyObject *res;
2121                         if (uniself==NULL)
2122                                 return NULL;
2123                         res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2124                                 striptype, sep);
2125                         Py_DECREF(uniself);
2126                         return res;
2127                 }
2128 #endif
2129                 PyErr_Format(PyExc_TypeError,
2130 #ifdef Py_USING_UNICODE
2131                              "%s arg must be None, str or unicode",
2132 #else
2133                              "%s arg must be None or str",
2134 #endif
2135                              STRIPNAME(striptype));
2136                 return NULL;
2137         }
2138
2139         return do_strip(self, striptype);
2140 }
2141
2142
2143 PyDoc_STRVAR(strip__doc__,
2144 "S.strip([chars]) -> string or unicode\n\
2145 \n\
2146 Return a copy of the string S with leading and trailing\n\
2147 whitespace removed.\n\
2148 If chars is given and not None, remove characters in chars instead.\n\
2149 If chars is unicode, S will be converted to unicode before stripping");
2150
2151 static PyObject *
2152 string_strip(PyStringObject *self, PyObject *args)
2153 {
2154         if (PyTuple_GET_SIZE(args) == 0)
2155                 return do_strip(self, BOTHSTRIP); /* Common case */
2156         else
2157                 return do_argstrip(self, BOTHSTRIP, args);
2158 }
2159
2160
2161 PyDoc_STRVAR(lstrip__doc__,
2162 "S.lstrip([chars]) -> string or unicode\n\
2163 \n\
2164 Return a copy of the string S with leading whitespace removed.\n\
2165 If chars is given and not None, remove characters in chars instead.\n\
2166 If chars is unicode, S will be converted to unicode before stripping");
2167
2168 static PyObject *
2169 string_lstrip(PyStringObject *self, PyObject *args)
2170 {
2171         if (PyTuple_GET_SIZE(args) == 0)
2172                 return do_strip(self, LEFTSTRIP); /* Common case */
2173         else
2174                 return do_argstrip(self, LEFTSTRIP, args);
2175 }
2176
2177
2178 PyDoc_STRVAR(rstrip__doc__,
2179 "S.rstrip([chars]) -> string or unicode\n\
2180 \n\
2181 Return a copy of the string S with trailing whitespace removed.\n\
2182 If chars is given and not None, remove characters in chars instead.\n\
2183 If chars is unicode, S will be converted to unicode before stripping");
2184
2185 static PyObject *
2186 string_rstrip(PyStringObject *self, PyObject *args)
2187 {
2188         if (PyTuple_GET_SIZE(args) == 0)
2189                 return do_strip(self, RIGHTSTRIP); /* Common case */
2190         else
2191                 return do_argstrip(self, RIGHTSTRIP, args);
2192 }
2193
2194
2195 PyDoc_STRVAR(lower__doc__,
2196 "S.lower() -> string\n\
2197 \n\
2198 Return a copy of the string S converted to lowercase.");
2199
2200 /* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2201 #ifndef _tolower
2202 #define _tolower tolower
2203 #endif
2204
2205 static PyObject *
2206 string_lower(PyStringObject *self)
2207 {
2208         char *s;
2209         Py_ssize_t i, n = PyString_GET_SIZE(self);
2210         PyObject *newobj;
2211
2212         newobj = PyString_FromStringAndSize(NULL, n);
2213         if (!newobj)
2214                 return NULL;
2215
2216         s = PyString_AS_STRING(newobj);
2217
2218         Py_MEMCPY(s, PyString_AS_STRING(self), n);
2219
2220         for (i = 0; i < n; i++) {
2221                 int c = Py_CHARMASK(s[i]);
2222                 if (isupper(c))
2223                         s[i] = _tolower(c);
2224         }
2225
2226         return newobj;
2227 }
2228
2229 PyDoc_STRVAR(upper__doc__,
2230 "S.upper() -> string\n\
2231 \n\
2232 Return a copy of the string S converted to uppercase.");
2233
2234 #ifndef _toupper
2235 #define _toupper toupper
2236 #endif
2237
2238 static PyObject *
2239 string_upper(PyStringObject *self)
2240 {
2241         char *s;
2242         Py_ssize_t i, n = PyString_GET_SIZE(self);
2243         PyObject *newobj;
2244
2245         newobj = PyString_FromStringAndSize(NULL, n);
2246         if (!newobj)
2247                 return NULL;
2248
2249         s = PyString_AS_STRING(newobj);
2250
2251         Py_MEMCPY(s, PyString_AS_STRING(self), n);
2252
2253         for (i = 0; i < n; i++) {
2254                 int c = Py_CHARMASK(s[i]);
2255                 if (islower(c))
2256                         s[i] = _toupper(c);
2257         }
2258
2259         return newobj;
2260 }
2261
2262 PyDoc_STRVAR(title__doc__,
2263 "S.title() -> string\n\
2264 \n\
2265 Return a titlecased version of S, i.e. words start with uppercase\n\
2266 characters, all remaining cased characters have lowercase.");
2267
2268 static PyObject*
2269 string_title(PyStringObject *self)
2270 {
2271         char *s = PyString_AS_STRING(self), *s_new;
2272         Py_ssize_t i, n = PyString_GET_SIZE(self);
2273         int previous_is_cased = 0;
2274         PyObject *newobj;
2275
2276         newobj = PyString_FromStringAndSize(NULL, n);
2277         if (newobj == NULL)
2278                 return NULL;
2279         s_new = PyString_AsString(newobj);
2280         for (i = 0; i < n; i++) {
2281                 int c = Py_CHARMASK(*s++);
2282                 if (islower(c)) {
2283                         if (!previous_is_cased)
2284                             c = toupper(c);
2285                         previous_is_cased = 1;
2286                 } else if (isupper(c)) {
2287                         if (previous_is_cased)
2288                             c = tolower(c);
2289                         previous_is_cased = 1;
2290                 } else
2291                         previous_is_cased = 0;
2292                 *s_new++ = c;
2293         }
2294         return newobj;
2295 }
2296
2297 PyDoc_STRVAR(capitalize__doc__,
2298 "S.capitalize() -> string\n\
2299 \n\
2300 Return a copy of the string S with only its first character\n\
2301 capitalized.");
2302
2303 static PyObject *
2304 string_capitalize(PyStringObject *self)
2305 {
2306         char *s = PyString_AS_STRING(self), *s_new;
2307         Py_ssize_t i, n = PyString_GET_SIZE(self);
2308         PyObject *newobj;
2309
2310         newobj = PyString_FromStringAndSize(NULL, n);
2311         if (newobj == NULL)
2312                 return NULL;
2313         s_new = PyString_AsString(newobj);
2314         if (0 < n) {
2315                 int c = Py_CHARMASK(*s++);
2316                 if (islower(c))
2317                         *s_new = toupper(c);
2318                 else
2319                         *s_new = c;
2320                 s_new++;
2321         }
2322         for (i = 1; i < n; i++) {
2323                 int c = Py_CHARMASK(*s++);
2324                 if (isupper(c))
2325                         *s_new = tolower(c);
2326                 else
2327                         *s_new = c;
2328                 s_new++;
2329         }
2330         return newobj;
2331 }
2332
2333
2334 PyDoc_STRVAR(count__doc__,
2335 "S.count(sub[, start[, end]]) -> int\n\
2336 \n\
2337 Return the number of non-overlapping occurrences of substring sub in\n\
2338 string S[start:end].  Optional arguments start and end are interpreted\n\
2339 as in slice notation.");
2340
2341 static PyObject *
2342 string_count(PyStringObject *self, PyObject *args)
2343 {
2344         PyObject *sub_obj;
2345         const char *str = PyString_AS_STRING(self), *sub;
2346         Py_ssize_t sub_len;
2347         Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
2348
2349         if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2350                 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2351                 return NULL;
2352
2353         if (PyString_Check(sub_obj)) {
2354                 sub = PyString_AS_STRING(sub_obj);
2355                 sub_len = PyString_GET_SIZE(sub_obj);
2356         }
2357 #ifdef Py_USING_UNICODE
2358         else if (PyUnicode_Check(sub_obj)) {
2359                 Py_ssize_t count;
2360                 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2361                 if (count == -1)
2362                         return NULL;
2363                 else
2364                         return PyInt_FromSsize_t(count);
2365         }
2366 #endif
2367         else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2368                 return NULL;
2369
2370         string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
2371
2372         return PyInt_FromSsize_t(
2373                 stringlib_count(str + start, end - start, sub, sub_len)
2374                 );
2375 }
2376
2377 PyDoc_STRVAR(swapcase__doc__,
2378 "S.swapcase() -> string\n\
2379 \n\
2380 Return a copy of the string S with uppercase characters\n\
2381 converted to lowercase and vice versa.");
2382
2383 static PyObject *
2384 string_swapcase(PyStringObject *self)
2385 {
2386         char *s = PyString_AS_STRING(self), *s_new;
2387         Py_ssize_t i, n = PyString_GET_SIZE(self);
2388         PyObject *newobj;
2389
2390         newobj = PyString_FromStringAndSize(NULL, n);
2391         if (newobj == NULL)
2392                 return NULL;
2393         s_new = PyString_AsString(newobj);
2394         for (i = 0; i < n; i++) {
2395                 int c = Py_CHARMASK(*s++);
2396                 if (islower(c)) {
2397                         *s_new = toupper(c);
2398                 }
2399                 else if (isupper(c)) {
2400                         *s_new = tolower(c);
2401                 }
2402                 else
2403                         *s_new = c;
2404                 s_new++;
2405         }
2406         return newobj;
2407 }
2408
2409
2410 PyDoc_STRVAR(translate__doc__,
2411 "S.translate(table [,deletechars]) -> string\n\
2412 \n\
2413 Return a copy of the string S, where all characters occurring\n\
2414 in the optional argument deletechars are removed, and the\n\
2415 remaining characters have been mapped through the given\n\
2416 translation table, which must be a string of length 256.");
2417
2418 static PyObject *
2419 string_translate(PyStringObject *self, PyObject *args)
2420 {
2421         register char *input, *output;
2422         const char *table;
2423         register Py_ssize_t i, c, changed = 0;
2424         PyObject *input_obj = (PyObject*)self;
2425         const char *output_start, *del_table=NULL;
2426         Py_ssize_t inlen, tablen, dellen = 0;
2427         PyObject *result;
2428         int trans_table[256];
2429         PyObject *tableobj, *delobj = NULL;
2430
2431         if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2432                               &tableobj, &delobj))
2433                 return NULL;
2434
2435         if (PyString_Check(tableobj)) {
2436                 table = PyString_AS_STRING(tableobj);
2437                 tablen = PyString_GET_SIZE(tableobj);
2438         }
2439         else if (tableobj == Py_None) {
2440                 table = NULL;
2441                 tablen = 256;
2442         }
2443 #ifdef Py_USING_UNICODE
2444         else if (PyUnicode_Check(tableobj)) {
2445                 /* Unicode .translate() does not support the deletechars
2446                    parameter; instead a mapping to None will cause characters
2447                    to be deleted. */
2448                 if (delobj != NULL) {
2449                         PyErr_SetString(PyExc_TypeError,
2450                         "deletions are implemented differently for unicode");
2451                         return NULL;
2452                 }
2453                 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2454         }
2455 #endif
2456         else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2457                 return NULL;
2458
2459         if (tablen != 256) {
2460                 PyErr_SetString(PyExc_ValueError,
2461                   "translation table must be 256 characters long");
2462                 return NULL;
2463         }
2464
2465         if (delobj != NULL) {
2466                 if (PyString_Check(delobj)) {
2467                         del_table = PyString_AS_STRING(delobj);
2468                         dellen = PyString_GET_SIZE(delobj);
2469                 }
2470 #ifdef Py_USING_UNICODE
2471                 else if (PyUnicode_Check(delobj)) {
2472                         PyErr_SetString(PyExc_TypeError,
2473                         "deletions are implemented differently for unicode");
2474                         return NULL;
2475                 }
2476 #endif
2477                 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2478                         return NULL;
2479         }
2480         else {
2481                 del_table = NULL;
2482                 dellen = 0;
2483         }
2484
2485         inlen = PyString_GET_SIZE(input_obj);
2486         result = PyString_FromStringAndSize((char *)NULL, inlen);
2487         if (result == NULL)
2488                 return NULL;
2489         output_start = output = PyString_AsString(result);
2490         input = PyString_AS_STRING(input_obj);
2491
2492         if (dellen == 0 && table != NULL) {
2493                 /* If no deletions are required, use faster code */
2494                 for (i = inlen; --i >= 0; ) {
2495                         c = Py_CHARMASK(*input++);
2496                         if (Py_CHARMASK((*output++ = table[c])) != c)
2497                                 changed = 1;
2498                 }
2499                 if (changed || !PyString_CheckExact(input_obj))
2500                         return result;
2501                 Py_DECREF(result);
2502                 Py_INCREF(input_obj);
2503                 return input_obj;
2504         }
2505
2506         if (table == NULL) {
2507                 for (i = 0; i < 256; i++)
2508                         trans_table[i] = Py_CHARMASK(i);
2509         } else {
2510                 for (i = 0; i < 256; i++)
2511                         trans_table[i] = Py_CHARMASK(table[i]);
2512         }
2513
2514         for (i = 0; i < dellen; i++)
2515                 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2516
2517         for (i = inlen; --i >= 0; ) {
2518                 c = Py_CHARMASK(*input++);
2519                 if (trans_table[c] != -1)
2520                         if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2521                                 continue;
2522                 changed = 1;
2523         }
2524         if (!changed && PyString_CheckExact(input_obj)) {
2525                 Py_DECREF(result);
2526                 Py_INCREF(input_obj);
2527                 return input_obj;
2528         }
2529         /* Fix the size of the resulting string */
2530         if (inlen > 0)
2531                 _PyString_Resize(&result, output - output_start);
2532         return result;
2533 }
2534
2535
2536 #define FORWARD 1
2537 #define REVERSE -1
2538
2539 /* find and count characters and substrings */
2540
2541 #define findchar(target, target_len, c)                         \
2542   ((char *)memchr((const void *)(target), c, target_len))
2543
2544 /* String ops must return a string.  */
2545 /* If the object is subclass of string, create a copy */
2546 Py_LOCAL(PyStringObject *)
2547 return_self(PyStringObject *self)
2548 {
2549         if (PyString_CheckExact(self)) {
2550                 Py_INCREF(self);
2551                 return self;
2552         }
2553         return (PyStringObject *)PyString_FromStringAndSize(
2554                 PyString_AS_STRING(self),
2555                 PyString_GET_SIZE(self));
2556 }
2557
2558 Py_LOCAL_INLINE(Py_ssize_t)
2559 countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
2560 {
2561         Py_ssize_t count=0;
2562         const char *start=target;
2563         const char *end=target+target_len;
2564
2565         while ( (start=findchar(start, end-start, c)) != NULL ) {
2566                 count++;
2567                 if (count >= maxcount)
2568                         break;
2569                 start += 1;
2570         }
2571         return count;
2572 }
2573
2574 Py_LOCAL(Py_ssize_t)
2575 findstring(const char *target, Py_ssize_t target_len,
2576            const char *pattern, Py_ssize_t pattern_len,
2577            Py_ssize_t start,
2578            Py_ssize_t end,
2579            int direction)
2580 {
2581         if (start < 0) {
2582                 start += target_len;
2583                 if (start < 0)
2584                         start = 0;
2585         }
2586         if (end > target_len) {
2587                 end = target_len;
2588         } else if (end < 0) {
2589                 end += target_len;
2590                 if (end < 0)
2591                         end = 0;
2592         }
2593
2594         /* zero-length substrings always match at the first attempt */
2595         if (pattern_len == 0)
2596                 return (direction > 0) ? start : end;
2597
2598         end -= pattern_len;
2599
2600         if (direction < 0) {
2601                 for (; end >= start; end--)
2602                         if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2603                                 return end;
2604         } else {
2605                 for (; start <= end; start++)
2606                         if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2607                                 return start;
2608         }
2609         return -1;
2610 }
2611
2612 Py_LOCAL_INLINE(Py_ssize_t)
2613 countstring(const char *target, Py_ssize_t target_len,
2614             const char *pattern, Py_ssize_t pattern_len,
2615             Py_ssize_t start,
2616             Py_ssize_t end,
2617             int direction, Py_ssize_t maxcount)
2618 {
2619         Py_ssize_t count=0;
2620
2621         if (start < 0) {
2622                 start += target_len;
2623                 if (start < 0)
2624                         start = 0;
2625         }
2626         if (end > target_len) {
2627                 end = target_len;
2628         } else if (end < 0) {
2629                 end += target_len;
2630                 if (end < 0)
2631                         end = 0;
2632         }
2633
2634         /* zero-length substrings match everywhere */
2635         if (pattern_len == 0 || maxcount == 0) {
2636                 if (target_len+1 < maxcount)
2637                         return target_len+1;
2638                 return maxcount;
2639         }
2640
2641         end -= pattern_len;
2642         if (direction < 0) {
2643                 for (; (end >= start); end--)
2644                         if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2645                                 count++;
2646                                 if (--maxcount <= 0) break;
2647                                 end -= pattern_len-1;
2648                         }
2649         } else {
2650                 for (; (start <= end); start++)
2651                         if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2652                                 count++;
2653                                 if (--maxcount <= 0)
2654                                         break;
2655                                 start += pattern_len-1;
2656                         }
2657         }
2658         return count;
2659 }
2660
2661
2662 /* Algorithms for different cases of string replacement */
2663
2664 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2665 Py_LOCAL(PyStringObject *)
2666 replace_interleave(PyStringObject *self,
2667                    const char *to_s, Py_ssize_t to_len,
2668                    Py_ssize_t maxcount)
2669 {
2670         char *self_s, *result_s;
2671         Py_ssize_t self_len, result_len;
2672         Py_ssize_t count, i, product;
2673         PyStringObject *result;
2674
2675         self_len = PyString_GET_SIZE(self);
2676
2677         /* 1 at the end plus 1 after every character */
2678         count = self_len+1;
2679         if (maxcount < count)
2680                 count = maxcount;
2681
2682         /* Check for overflow */
2683         /*   result_len = count * to_len + self_len; */
2684         product = count * to_len;
2685         if (product / to_len != count) {
2686                 PyErr_SetString(PyExc_OverflowError,
2687                                 "replace string is too long");
2688                 return NULL;
2689         }
2690         result_len = product + self_len;
2691         if (result_len < 0) {
2692                 PyErr_SetString(PyExc_OverflowError,
2693                                 "replace string is too long");
2694                 return NULL;
2695         }
2696
2697         if (! (result = (PyStringObject *)
2698                          PyString_FromStringAndSize(NULL, result_len)) )
2699                 return NULL;
2700
2701         self_s = PyString_AS_STRING(self);
2702         result_s = PyString_AS_STRING(result);
2703
2704         /* TODO: special case single character, which doesn't need memcpy */
2705
2706         /* Lay the first one down (guaranteed this will occur) */
2707         Py_MEMCPY(result_s, to_s, to_len);
2708         result_s += to_len;
2709         count -= 1;
2710
2711         for (i=0; i<count; i++) {
2712                 *result_s++ = *self_s++;
2713                 Py_MEMCPY(result_s, to_s, to_len);
2714                 result_s += to_len;
2715         }
2716
2717         /* Copy the rest of the original string */
2718         Py_MEMCPY(result_s, self_s, self_len-i);
2719
2720         return result;
2721 }
2722
2723 /* Special case for deleting a single character */
2724 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2725 Py_LOCAL(PyStringObject *)
2726 replace_delete_single_character(PyStringObject *self,
2727                                 char from_c, Py_ssize_t maxcount)
2728 {
2729         char *self_s, *result_s;
2730         char *start, *next, *end;
2731         Py_ssize_t self_len, result_len;
2732         Py_ssize_t count;
2733         PyStringObject *result;
2734
2735         self_len = PyString_GET_SIZE(self);
2736         self_s = PyString_AS_STRING(self);
2737
2738         count = countchar(self_s, self_len, from_c, maxcount);
2739         if (count == 0) {
2740                 return return_self(self);
2741         }
2742
2743         result_len = self_len - count;  /* from_len == 1 */
2744         assert(result_len>=0);
2745
2746         if ( (result = (PyStringObject *)
2747                         PyString_FromStringAndSize(NULL, result_len)) == NULL)
2748                 return NULL;
2749         result_s = PyString_AS_STRING(result);
2750
2751         start = self_s;
2752         end = self_s + self_len;
2753         while (count-- > 0) {
2754                 next = findchar(start, end-start, from_c);
2755                 if (next == NULL)
2756                         break;
2757                 Py_MEMCPY(result_s, start, next-start);
2758                 result_s += (next-start);
2759                 start = next+1;
2760         }
2761         Py_MEMCPY(result_s, start, end-start);
2762
2763         return result;
2764 }
2765
2766 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2767
2768 Py_LOCAL(PyStringObject *)
2769 replace_delete_substring(PyStringObject *self,
2770                          const char *from_s, Py_ssize_t from_len,
2771                          Py_ssize_t maxcount) {
2772         char *self_s, *result_s;
2773         char *start, *next, *end;
2774         Py_ssize_t self_len, result_len;
2775         Py_ssize_t count, offset;
2776         PyStringObject *result;
2777
2778         self_len = PyString_GET_SIZE(self);
2779         self_s = PyString_AS_STRING(self);
2780
2781         count = countstring(self_s, self_len,
2782                             from_s, from_len,
2783                             0, self_len, 1,
2784                             maxcount);
2785
2786         if (count == 0) {
2787                 /* no matches */
2788                 return return_self(self);
2789         }
2790
2791         result_len = self_len - (count * from_len);
2792         assert (result_len>=0);
2793
2794         if ( (result = (PyStringObject *)
2795               PyString_FromStringAndSize(NULL, result_len)) == NULL )
2796                 return NULL;
2797
2798         result_s = PyString_AS_STRING(result);
2799
2800         start = self_s;
2801         end = self_s + self_len;
2802         while (count-- > 0) {
2803                 offset = findstring(start, end-start,
2804                                     from_s, from_len,
2805                                     0, end-start, FORWARD);
2806                 if (offset == -1)
2807                         break;
2808                 next = start + offset;
2809
2810                 Py_MEMCPY(result_s, start, next-start);
2811
2812                 result_s += (next-start);
2813                 start = next+from_len;
2814         }
2815         Py_MEMCPY(result_s, start, end-start);
2816         return result;
2817 }
2818
2819 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2820 Py_LOCAL(PyStringObject *)
2821 replace_single_character_in_place(PyStringObject *self,
2822                                   char from_c, char to_c,
2823                                   Py_ssize_t maxcount)
2824 {
2825         char *self_s, *result_s, *start, *end, *next;
2826         Py_ssize_t self_len;
2827         PyStringObject *result;
2828
2829         /* The result string will be the same size */
2830         self_s = PyString_AS_STRING(self);
2831         self_len = PyString_GET_SIZE(self);
2832
2833         next = findchar(self_s, self_len, from_c);
2834
2835         if (next == NULL) {
2836                 /* No matches; return the original string */
2837                 return return_self(self);
2838         }
2839
2840         /* Need to make a new string */
2841         result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2842         if (result == NULL)
2843                 return NULL;
2844         result_s = PyString_AS_STRING(result);
2845         Py_MEMCPY(result_s, self_s, self_len);
2846
2847         /* change everything in-place, starting with this one */
2848         start =  result_s + (next-self_s);
2849         *start = to_c;
2850         start++;
2851         end = result_s + self_len;
2852
2853         while (--maxcount > 0) {
2854                 next = findchar(start, end-start, from_c);
2855                 if (next == NULL)
2856                         break;
2857                 *next = to_c;
2858                 start = next+1;
2859         }
2860
2861         return result;
2862 }
2863
2864 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2865 Py_LOCAL(PyStringObject *)
2866 replace_substring_in_place(PyStringObject *self,
2867                            const char *from_s, Py_ssize_t from_len,
2868                            const char *to_s, Py_ssize_t to_len,
2869                            Py_ssize_t maxcount)
2870 {
2871         char *result_s, *start, *end;
2872         char *self_s;
2873         Py_ssize_t self_len, offset;
2874         PyStringObject *result;
2875
2876         /* The result string will be the same size */
2877
2878         self_s = PyString_AS_STRING(self);
2879         self_len = PyString_GET_SIZE(self);
2880
2881         offset = findstring(self_s, self_len,
2882                             from_s, from_len,
2883                             0, self_len, FORWARD);
2884         if (offset == -1) {
2885                 /* No matches; return the original string */
2886                 return return_self(self);
2887         }
2888
2889         /* Need to make a new string */
2890         result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2891         if (result == NULL)
2892                 return NULL;
2893         result_s = PyString_AS_STRING(result);
2894         Py_MEMCPY(result_s, self_s, self_len);
2895
2896         /* change everything in-place, starting with this one */
2897         start =  result_s + offset;
2898         Py_MEMCPY(start, to_s, from_len);
2899         start += from_len;
2900         end = result_s + self_len;
2901
2902         while ( --maxcount > 0) {
2903                 offset = findstring(start, end-start,
2904                                     from_s, from_len,
2905                                     0, end-start, FORWARD);
2906                 if (offset==-1)
2907                         break;
2908                 Py_MEMCPY(start+offset, to_s, from_len);
2909                 start += offset+from_len;
2910         }
2911
2912         return result;
2913 }
2914
2915 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2916 Py_LOCAL(PyStringObject *)
2917 replace_single_character(PyStringObject *self,
2918                          char from_c,
2919                          const char *to_s, Py_ssize_t to_len,
2920                          Py_ssize_t maxcount)
2921 {
2922         char *self_s, *result_s;
2923         char *start, *next, *end;
2924         Py_ssize_t self_len, result_len;
2925         Py_ssize_t count, product;
2926         PyStringObject *result;
2927
2928         self_s = PyString_AS_STRING(self);
2929         self_len = PyString_GET_SIZE(self);
2930
2931         count = countchar(self_s, self_len, from_c, maxcount);
2932         if (count == 0) {
2933                 /* no matches, return unchanged */
2934                 return return_self(self);
2935         }
2936
2937         /* use the difference between current and new, hence the "-1" */
2938         /*   result_len = self_len + count * (to_len-1)  */
2939         product = count * (to_len-1);
2940         if (product / (to_len-1) != count) {
2941                 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2942                 return NULL;
2943         }
2944         result_len = self_len + product;
2945         if (result_len < 0) {
2946                 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2947                 return NULL;
2948         }
2949
2950         if ( (result = (PyStringObject *)
2951               PyString_FromStringAndSize(NULL, result_len)) == NULL)
2952                 return NULL;
2953         result_s = PyString_AS_STRING(result);
2954
2955         start = self_s;
2956         end = self_s + self_len;
2957         while (count-- > 0) {
2958                 next = findchar(start, end-start, from_c);
2959                 if (next == NULL)
2960                         break;
2961
2962                 if (next == start) {
2963                         /* replace with the 'to' */
2964                         Py_MEMCPY(result_s, to_s, to_len);
2965                         result_s += to_len;
2966                         start += 1;
2967                 } else {
2968                         /* copy the unchanged old then the 'to' */
2969                         Py_MEMCPY(result_s, start, next-start);
2970                         result_s += (next-start);
2971                         Py_MEMCPY(result_s, to_s, to_len);
2972                         result_s += to_len;
2973                         start = next+1;
2974                 }
2975         }
2976         /* Copy the remainder of the remaining string */
2977         Py_MEMCPY(result_s, start, end-start);
2978
2979         return result;
2980 }
2981
2982 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2983 Py_LOCAL(PyStringObject *)
2984 replace_substring(PyStringObject *self,
2985                   const char *from_s, Py_ssize_t from_len,
2986                   const char *to_s, Py_ssize_t to_len,
2987                   Py_ssize_t maxcount) {
2988         char *self_s, *result_s;
2989         char *start, *next, *end;
2990         Py_ssize_t self_len, result_len;
2991         Py_ssize_t count, offset, product;
2992         PyStringObject *result;
2993
2994         self_s = PyString_AS_STRING(self);
2995         self_len = PyString_GET_SIZE(self);
2996
2997         count = countstring(self_s, self_len,
2998                             from_s, from_len,
2999                             0, self_len, FORWARD, maxcount);
3000         if (count == 0) {
3001                 /* no matches, return unchanged */
3002                 return return_self(self);
3003         }
3004
3005         /* Check for overflow */
3006         /*    result_len = self_len + count * (to_len-from_len) */
3007         product = count * (to_len-from_len);
3008         if (product / (to_len-from_len) != count) {
3009                 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3010                 return NULL;
3011         }
3012         result_len = self_len + product;
3013         if (result_len < 0) {
3014                 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3015                 return NULL;
3016         }
3017
3018         if ( (result = (PyStringObject *)
3019               PyString_FromStringAndSize(NULL, result_len)) == NULL)
3020                 return NULL;
3021         result_s = PyString_AS_STRING(result);
3022
3023         start = self_s;
3024         end = self_s + self_len;
3025         while (count-- > 0) {
3026                 offset = findstring(start, end-start,
3027                                     from_s, from_len,
3028                                     0, end-start, FORWARD);
3029                 if (offset == -1)
3030                         break;
3031                 next = start+offset;
3032                 if (next == start) {
3033                         /* replace with the 'to' */
3034                         Py_MEMCPY(result_s, to_s, to_len);
3035                         result_s += to_len;
3036                         start += from_len;
3037                 } else {
3038                         /* copy the unchanged old then the 'to' */
3039                         Py_MEMCPY(result_s, start, next-start);
3040                         result_s += (next-start);
3041                         Py_MEMCPY(result_s, to_s, to_len);
3042                         result_s += to_len;
3043                         start = next+from_len;
3044                 }
3045         }
3046         /* Copy the remainder of the remaining string */
3047         Py_MEMCPY(result_s, start, end-start);
3048
3049         return result;
3050 }
3051
3052
3053 Py_LOCAL(PyStringObject *)
3054 replace(PyStringObject *self,
3055         const char *from_s, Py_ssize_t from_len,
3056         const char *to_s, Py_ssize_t to_len,
3057         Py_ssize_t maxcount)
3058 {
3059         if (maxcount < 0) {
3060                 maxcount = PY_SSIZE_T_MAX;
3061         } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
3062                 /* nothing to do; return the original string */
3063                 return return_self(self);
3064         }
3065
3066         if (maxcount == 0 ||
3067             (from_len == 0 && to_len == 0)) {
3068                 /* nothing to do; return the original string */
3069                 return return_self(self);
3070         }
3071
3072         /* Handle zero-length special cases */
3073
3074         if (from_len == 0) {
3075                 /* insert the 'to' string everywhere.   */
3076                 /*    >>> "Python".replace("", ".")     */
3077                 /*    '.P.y.t.h.o.n.'                   */
3078                 return replace_interleave(self, to_s, to_len, maxcount);
3079         }
3080
3081         /* Except for "".replace("", "A") == "A" there is no way beyond this */
3082         /* point for an empty self string to generate a non-empty string */
3083         /* Special case so the remaining code always gets a non-empty string */
3084         if (PyString_GET_SIZE(self) == 0) {
3085                 return return_self(self);
3086         }
3087
3088         if (to_len == 0) {
3089                 /* delete all occurances of 'from' string */
3090                 if (from_len == 1) {
3091                         return replace_delete_single_character(
3092                                 self, from_s[0], maxcount);
3093                 } else {
3094                         return replace_delete_substring(self, from_s, from_len, maxcount);
3095                 }
3096         }
3097
3098         /* Handle special case where both strings have the same length */
3099
3100         if (from_len == to_len) {
3101                 if (from_len == 1) {
3102                         return replace_single_character_in_place(
3103                                 self,
3104                                 from_s[0],
3105                                 to_s[0],
3106                                 maxcount);
3107                 } else {
3108                         return replace_substring_in_place(
3109                                 self, from_s, from_len, to_s, to_len, maxcount);
3110                 }
3111         }
3112
3113         /* Otherwise use the more generic algorithms */
3114         if (from_len == 1) {
3115                 return replace_single_character(self, from_s[0],
3116                                                 to_s, to_len, maxcount);
3117         } else {
3118                 /* len('from')>=2, len('to')>=1 */
3119                 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
3120         }
3121 }
3122
3123 PyDoc_STRVAR(replace__doc__,
3124 "S.replace (old, new[, count]) -> string\n\
3125 \n\
3126 Return a copy of string S with all occurrences of substring\n\
3127 old replaced by new.  If the optional argument count is\n\
3128 given, only the first count occurrences are replaced.");
3129
3130 static PyObject *
3131 string_replace(PyStringObject *self, PyObject *args)
3132 {
3133         Py_ssize_t count = -1;
3134         PyObject *from, *to;
3135         const char *from_s, *to_s;
3136         Py_ssize_t from_len, to_len;
3137
3138         if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
3139                 return NULL;
3140
3141         if (PyString_Check(from)) {
3142                 from_s = PyString_AS_STRING(from);
3143                 from_len = PyString_GET_SIZE(from);
3144         }
3145 #ifdef Py_USING_UNICODE
3146         if (PyUnicode_Check(from))
3147                 return PyUnicode_Replace((PyObject *)self,
3148                                          from, to, count);
3149 #endif
3150         else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
3151                 return NULL;
3152
3153         if (PyString_Check(to)) {
3154                 to_s = PyString_AS_STRING(to);
3155                 to_len = PyString_GET_SIZE(to);
3156         }
3157 #ifdef Py_USING_UNICODE
3158         else if (PyUnicode_Check(to))
3159                 return PyUnicode_Replace((PyObject *)self,
3160                                          from, to, count);
3161 #endif
3162         else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
3163                 return NULL;
3164
3165         return (PyObject *)replace((PyStringObject *) self,
3166                                    from_s, from_len,
3167                                    to_s, to_len, count);
3168 }
3169
3170 /** End DALKE **/
3171
3172 /* Matches the end (direction >= 0) or start (direction < 0) of self
3173  * against substr, using the start and end arguments. Returns
3174  * -1 on error, 0 if not found and 1 if found.
3175  */
3176 Py_LOCAL(int)
3177 _string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
3178                   Py_ssize_t end, int direction)
3179 {
3180         Py_ssize_t len = PyString_GET_SIZE(self);
3181         Py_ssize_t slen;
3182         const char* sub;
3183         const char* str;
3184
3185         if (PyString_Check(substr)) {
3186                 sub = PyString_AS_STRING(substr);
3187                 slen = PyString_GET_SIZE(substr);
3188         }
3189 #ifdef Py_USING_UNICODE
3190         else if (PyUnicode_Check(substr))
3191                 return PyUnicode_Tailmatch((PyObject *)self,
3192                                            substr, start, end, direction);
3193 #endif
3194         else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3195                 return -1;
3196         str = PyString_AS_STRING(self);
3197
3198         string_adjust_indices(&start, &end, len);
3199
3200         if (direction < 0) {
3201                 /* startswith */
3202                 if (start+slen > len)
3203                         return 0;
3204         } else {
3205                 /* endswith */
3206                 if (end-start < slen || start > len)
3207                         return 0;
3208
3209                 if (end-slen > start)
3210                         start = end - slen;
3211         }
3212         if (end-start >= slen)
3213                 return ! memcmp(str+start, sub, slen);
3214         return 0;
3215 }
3216
3217
3218 PyDoc_STRVAR(startswith__doc__,
3219 "S.startswith(prefix[, start[, end]]) -> bool\n\
3220 \n\
3221 Return True if S starts with the specified prefix, False otherwise.\n\
3222 With optional start, test S beginning at that position.\n\
3223 With optional end, stop comparing S at that position.\n\
3224 prefix can also be a tuple of strings to try.");
3225
3226 static PyObject *
3227 string_startswith(PyStringObject *self, PyObject *args)
3228 {
3229         Py_ssize_t start = 0;
3230         Py_ssize_t end = PY_SSIZE_T_MAX;
3231         PyObject *subobj;
3232         int result;
3233
3234         if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3235                 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3236                 return NULL;
3237         if (PyTuple_Check(subobj)) {
3238                 Py_ssize_t i;
3239                 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3240                         result = _string_tailmatch(self,
3241                                         PyTuple_GET_ITEM(subobj, i),
3242                                         start, end, -1);
3243                         if (result == -1)
3244                                 return NULL;
3245                         else if (result) {
3246                                 Py_RETURN_TRUE;
3247                         }
3248                 }
3249                 Py_RETURN_FALSE;
3250         }
3251         result = _string_tailmatch(self, subobj, start, end, -1);
3252         if (result == -1)
3253                 return NULL;
3254         else
3255                 return PyBool_FromLong(result);
3256 }
3257
3258
3259 PyDoc_STRVAR(endswith__doc__,
3260 "S.endswith(suffix[, start[, end]]) -> bool\n\
3261 \n\
3262 Return True if S ends with the specified suffix, False otherwise.\n\
3263 With optional start, test S beginning at that position.\n\
3264 With optional end, stop comparing S at that position.\n\
3265 suffix can also be a tuple of strings to try.");
3266
3267 static PyObject *
3268 string_endswith(PyStringObject *self, PyObject *args)
3269 {
3270         Py_ssize_t start = 0;
3271         Py_ssize_t end = PY_SSIZE_T_MAX;
3272         PyObject *subobj;
3273         int result;
3274
3275         if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3276                 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3277                 return NULL;
3278         if (PyTuple_Check(subobj)) {
3279                 Py_ssize_t i;
3280                 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3281                         result = _string_tailmatch(self,
3282                                         PyTuple_GET_ITEM(subobj, i),
3283                                         start, end, +1);
3284                         if (result == -1)
3285                                 return NULL;
3286                         else if (result) {
3287                                 Py_RETURN_TRUE;
3288                         }
3289                 }
3290                 Py_RETURN_FALSE;
3291         }
3292         result = _string_tailmatch(self, subobj, start, end, +1);
3293         if (result == -1)
3294                 return NULL;
3295         else
3296                 return PyBool_FromLong(result);
3297 }
3298
3299
3300 PyDoc_STRVAR(encode__doc__,
3301 "S.encode([encoding[,errors]]) -> object\n\
3302 \n\
3303 Encodes S using the codec registered for encoding. encoding defaults\n\
3304 to the default encoding. errors may be given to set a different error\n\
3305 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3306 a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3307 'xmlcharrefreplace' as well as any other name registered with\n\
3308 codecs.register_error that is able to handle UnicodeEncodeErrors.");
3309
3310 static PyObject *
3311 string_encode(PyStringObject *self, PyObject *args)
3312 {
3313     char *encoding = NULL;
3314     char *errors = NULL;
3315     PyObject *v;
3316
3317     if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3318         return NULL;
3319     v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
3320     if (v == NULL)
3321         goto onError;
3322     if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3323         PyErr_Format(PyExc_TypeError,
3324                      "encoder did not return a string/unicode object "
3325                      "(type=%.400s)",
3326                      Py_TYPE(v)->tp_name);
3327         Py_DECREF(v);
3328         return NULL;
3329     }
3330     return v;
3331
3332  onError:
3333     return NULL;
3334 }
3335
3336
3337 PyDoc_STRVAR(decode__doc__,
3338 "S.decode([encoding[,errors]]) -> object\n\
3339 \n\
3340 Decodes S using the codec registered for encoding. encoding defaults\n\
3341 to the default encoding. errors may be given to set a different error\n\
3342 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3343 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3344 as well as any other name registerd with codecs.register_error that is\n\
3345 able to handle UnicodeDecodeErrors.");
3346
3347 static PyObject *
3348 string_decode(PyStringObject *self, PyObject *args)
3349 {
3350     char *encoding = NULL;
3351     char *errors = NULL;
3352     PyObject *v;
3353
3354     if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3355         return NULL;
3356     v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
3357     if (v == NULL)
3358         goto onError;
3359     if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3360         PyErr_Format(PyExc_TypeError,
3361                      "decoder did not return a string/unicode object "
3362                      "(type=%.400s)",
3363                      Py_TYPE(v)->tp_name);
3364         Py_DECREF(v);
3365         return NULL;
3366     }
3367     return v;
3368
3369  onError:
3370     return NULL;
3371 }
3372
3373
3374 PyDoc_STRVAR(expandtabs__doc__,
3375 "S.expandtabs([tabsize]) -> string\n\
3376 \n\
3377 Return a copy of S where all tab characters are expanded using spaces.\n\
3378 If tabsize is not given, a tab size of 8 characters is assumed.");
3379
3380 static PyObject*
3381 string_expandtabs(PyStringObject *self, PyObject *args)
3382 {
3383     const char *e, *p, *qe;
3384     char *q;
3385     Py_ssize_t i, j, incr;
3386     PyObject *u;
3387     int tabsize = 8;
3388
3389     if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3390         return NULL;
3391
3392     /* First pass: determine size of output string */
3393     i = 0; /* chars up to and including most recent \n or \r */
3394     j = 0; /* chars since most recent \n or \r (use in tab calculations) */
3395     e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
3396     for (p = PyString_AS_STRING(self); p < e; p++)
3397         if (*p == '\t') {
3398             if (tabsize > 0) {
3399                 incr = tabsize - (j % tabsize);
3400                 if (j > PY_SSIZE_T_MAX - incr)
3401                     goto overflow1;
3402                 j += incr;
3403             }
3404         }
3405         else {
3406             if (j > PY_SSIZE_T_MAX - 1)
3407                 goto overflow1;
3408             j++;
3409             if (*p == '\n' || *p == '\r') {
3410                 if (i > PY_SSIZE_T_MAX - j)
3411                     goto overflow1;
3412                 i += j;
3413                 j = 0;
3414             }
3415         }
3416
3417     if (i > PY_SSIZE_T_MAX - j)
3418         goto overflow1;
3419
3420     /* Second pass: create output string and fill it */
3421     u = PyString_FromStringAndSize(NULL, i + j);
3422     if (!u)
3423         return NULL;
3424
3425     j = 0; /* same as in first pass */
3426     q = PyString_AS_STRING(u); /* next output char */
3427     qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
3428
3429     for (p = PyString_AS_STRING(self); p < e; p++)
3430         if (*p == '\t') {
3431             if (tabsize > 0) {
3432                 i = tabsize - (j % tabsize);
3433                 j += i;
3434                 while (i--) {
3435                     if (q >= qe)
3436                         goto overflow2;
3437                     *q++ = ' ';
3438                 }
3439             }
3440         }
3441         else {
3442             if (q >= qe)
3443                 goto overflow2;
3444             *q++ = *p;
3445             j++;
3446             if (*p == '\n' || *p == '\r')
3447                 j = 0;
3448         }
3449
3450     return u;
3451
3452   overflow2:
3453     Py_DECREF(u);
3454   overflow1:
3455     PyErr_SetString(PyExc_OverflowError, "new string is too long");
3456     return NULL;
3457 }
3458
3459 Py_LOCAL_INLINE(PyObject *)
3460 pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
3461 {
3462     PyObject *u;
3463
3464     if (left < 0)
3465         left = 0;
3466     if (right < 0)
3467         right = 0;
3468
3469     if (left == 0 && right == 0 && PyString_CheckExact(self)) {
3470         Py_INCREF(self);
3471         return (PyObject *)self;
3472     }
3473
3474     u = PyString_FromStringAndSize(NULL,
3475                                    left + PyString_GET_SIZE(self) + right);
3476     if (u) {
3477         if (left)
3478             memset(PyString_AS_STRING(u), fill, left);
3479         Py_MEMCPY(PyString_AS_STRING(u) + left,
3480                PyString_AS_STRING(self),
3481                PyString_GET_SIZE(self));
3482         if (right)
3483             memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3484                    fill, right);
3485     }
3486
3487     return u;
3488 }
3489
3490 PyDoc_STRVAR(ljust__doc__,
3491 "S.ljust(width[, fillchar]) -> string\n"
3492 "\n"
3493 "Return S left justified in a string of length width. Padding is\n"
3494 "done using the specified fill character (default is a space).");
3495
3496 static PyObject *
3497 string_ljust(PyStringObject *self, PyObject *args)
3498 {
3499     Py_ssize_t width;
3500     char fillchar = ' ';
3501
3502     if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
3503         return NULL;
3504
3505     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3506         Py_INCREF(self);
3507         return (PyObject*) self;
3508     }
3509
3510     return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
3511 }
3512
3513
3514 PyDoc_STRVAR(rjust__doc__,
3515 "S.rjust(width[, fillchar]) -> string\n"
3516 "\n"
3517 "Return S right justified in a string of length width. Padding is\n"
3518 "done using the specified fill character (default is a space)");
3519
3520 static PyObject *
3521 string_rjust(PyStringObject *self, PyObject *args)
3522 {
3523     Py_ssize_t width;
3524     char fillchar = ' ';
3525
3526     if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
3527         return NULL;
3528
3529     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3530         Py_INCREF(self);
3531         return (PyObject*) self;
3532     }
3533
3534     return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
3535 }
3536
3537
3538 PyDoc_STRVAR(center__doc__,
3539 "S.center(width[, fillchar]) -> string\n"
3540 "\n"
3541 "Return S centered in a string of length width. Padding is\n"
3542 "done using the specified fill character (default is a space)");
3543
3544 static PyObject *
3545 string_center(PyStringObject *self, PyObject *args)
3546 {
3547     Py_ssize_t marg, left;
3548     Py_ssize_t width;
3549     char fillchar = ' ';
3550
3551     if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
3552         return NULL;
3553
3554     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3555         Py_INCREF(self);
3556         return (PyObject*) self;
3557     }
3558
3559     marg = width - PyString_GET_SIZE(self);
3560     left = marg / 2 + (marg & width & 1);
3561
3562     return pad(self, left, marg - left, fillchar);
3563 }
3564
3565 PyDoc_STRVAR(zfill__doc__,
3566 "S.zfill(width) -> string\n"
3567 "\n"
3568 "Pad a numeric string S with zeros on the left, to fill a field\n"
3569 "of the specified width.  The string S is never truncated.");
3570
3571 static PyObject *
3572 string_zfill(PyStringObject *self, PyObject *args)
3573 {
3574     Py_ssize_t fill;
3575     PyObject *s;
3576     char *p;
3577     Py_ssize_t width;
3578
3579     if (!PyArg_ParseTuple(args, "n:zfill", &width))
3580         return NULL;
3581
3582     if (PyString_GET_SIZE(self) >= width) {
3583         if (PyString_CheckExact(self)) {
3584             Py_INCREF(self);
3585             return (PyObject*) self;
3586         }
3587         else
3588             return PyString_FromStringAndSize(
3589                 PyString_AS_STRING(self),
3590                 PyString_GET_SIZE(self)
3591             );
3592     }
3593
3594     fill = width - PyString_GET_SIZE(self);
3595
3596     s = pad(self, fill, 0, '0');
3597
3598     if (s == NULL)
3599         return NULL;
3600
3601     p = PyString_AS_STRING(s);
3602     if (p[fill] == '+' || p[fill] == '-') {
3603         /* move sign to beginning of string */
3604         p[0] = p[fill];
3605         p[fill] = '0';
3606     }
3607
3608     return (PyObject*) s;
3609 }
3610
3611 PyDoc_STRVAR(isspace__doc__,
3612 "S.isspace() -> bool\n\
3613 \n\
3614 Return True if all characters in S are whitespace\n\
3615 and there is at least one character in S, False otherwise.");
3616
3617 static PyObject*
3618 string_isspace(PyStringObject *self)
3619 {
3620     register const unsigned char *p
3621         = (unsigned char *) PyString_AS_STRING(self);
3622     register const unsigned char *e;
3623
3624     /* Shortcut for single character strings */
3625     if (PyString_GET_SIZE(self) == 1 &&
3626         isspace(*p))
3627         return PyBool_FromLong(1);
3628
3629     /* Special case for empty strings */
3630     if (PyString_GET_SIZE(self) == 0)
3631         return PyBool_FromLong(0);
3632
3633     e = p + PyString_GET_SIZE(self);
3634     for (; p < e; p++) {
3635         if (!isspace(*p))
3636             return PyBool_FromLong(0);
3637     }
3638     return PyBool_FromLong(1);
3639 }
3640
3641
3642 PyDoc_STRVAR(isalpha__doc__,
3643 "S.isalpha() -> bool\n\
3644 \n\
3645 Return True if all characters in S are alphabetic\n\
3646 and there is at least one character in S, False otherwise.");
3647
3648 static PyObject*
3649 string_isalpha(PyStringObject *self)
3650 {
3651     register const unsigned char *p
3652         = (unsigned char *) PyString_AS_STRING(self);
3653     register const unsigned char *e;
3654
3655     /* Shortcut for single character strings */
3656     if (PyString_GET_SIZE(self) == 1 &&
3657         isalpha(*p))
3658         return PyBool_FromLong(1);
3659
3660     /* Special case for empty strings */
3661     if (PyString_GET_SIZE(self) == 0)
3662         return PyBool_FromLong(0);
3663
3664     e = p + PyString_GET_SIZE(self);
3665     for (; p < e; p++) {
3666         if (!isalpha(*p))
3667             return PyBool_FromLong(0);
3668     }
3669     return PyBool_FromLong(1);
3670 }
3671
3672
3673 PyDoc_STRVAR(isalnum__doc__,
3674 "S.isalnum() -> bool\n\
3675 \n\
3676 Return True if all characters in S are alphanumeric\n\
3677 and there is at least one character in S, False otherwise.");
3678
3679 static PyObject*
3680 string_isalnum(PyStringObject *self)
3681 {
3682     register const unsigned char *p
3683         = (unsigned char *) PyString_AS_STRING(self);
3684     register const unsigned char *e;
3685
3686     /* Shortcut for single character strings */
3687     if (PyString_GET_SIZE(self) == 1 &&
3688         isalnum(*p))
3689         return PyBool_FromLong(1);
3690
3691     /* Special case for empty strings */
3692     if (PyString_GET_SIZE(self) == 0)
3693         return PyBool_FromLong(0);
3694
3695     e = p + PyString_GET_SIZE(self);
3696     for (; p < e; p++) {
3697         if (!isalnum(*p))
3698             return PyBool_FromLong(0);
3699     }
3700     return PyBool_FromLong(1);
3701 }
3702
3703
3704 PyDoc_STRVAR(isdigit__doc__,
3705 "S.isdigit() -> bool\n\
3706 \n\
3707 Return True if all characters in S are digits\n\
3708 and there is at least one character in S, False otherwise.");
3709
3710 static PyObject*
3711 string_isdigit(PyStringObject *self)
3712 {
3713     register const unsigned char *p
3714         = (unsigned char *) PyString_AS_STRING(self);
3715     register const unsigned char *e;
3716
3717     /* Shortcut for single character strings */
3718     if (PyString_GET_SIZE(self) == 1 &&
3719         isdigit(*p))
3720         return PyBool_FromLong(1);
3721
3722     /* Special case for empty strings */
3723     if (PyString_GET_SIZE(self) == 0)
3724         return PyBool_FromLong(0);
3725
3726     e = p + PyString_GET_SIZE(self);
3727     for (; p < e; p++) {
3728         if (!isdigit(*p))
3729             return PyBool_FromLong(0);
3730     }
3731     return PyBool_FromLong(1);
3732 }
3733
3734
3735 PyDoc_STRVAR(islower__doc__,
3736 "S.islower() -> bool\n\
3737 \n\
3738 Return True if all cased characters in S are lowercase and there is\n\
3739 at least one cased character in S, False otherwise.");
3740
3741 static PyObject*
3742 string_islower(PyStringObject *self)
3743 {
3744     register const unsigned char *p
3745         = (unsigned char *) PyString_AS_STRING(self);
3746     register const unsigned char *e;
3747     int cased;
3748
3749     /* Shortcut for single character strings */
3750     if (PyString_GET_SIZE(self) == 1)
3751         return PyBool_FromLong(islower(*p) != 0);
3752
3753     /* Special case for empty strings */
3754     if (PyString_GET_SIZE(self) == 0)
3755         return PyBool_FromLong(0);
3756
3757     e = p + PyString_GET_SIZE(self);
3758     cased = 0;
3759     for (; p < e; p++) {
3760         if (isupper(*p))
3761             return PyBool_FromLong(0);
3762         else if (!cased && islower(*p))
3763             cased = 1;
3764     }
3765     return PyBool_FromLong(cased);
3766 }
3767
3768
3769 PyDoc_STRVAR(isupper__doc__,
3770 "S.isupper() -> bool\n\
3771 \n\
3772 Return True if all cased characters in S are uppercase and there is\n\
3773 at least one cased character in S, False otherwise.");
3774
3775 static PyObject*
3776 string_isupper(PyStringObject *self)
3777 {
3778     register const unsigned char *p
3779         = (unsigned char *) PyString_AS_STRING(self);
3780     register const unsigned char *e;
3781     int cased;
3782
3783     /* Shortcut for single character strings */
3784     if (PyString_GET_SIZE(self) == 1)
3785         return PyBool_FromLong(isupper(*p) != 0);
3786
3787     /* Special case for empty strings */
3788     if (PyString_GET_SIZE(self) == 0)
3789         return PyBool_FromLong(0);
3790
3791     e = p + PyString_GET_SIZE(self);
3792     cased = 0;
3793     for (; p < e; p++) {
3794         if (islower(*p))
3795             return PyBool_FromLong(0);
3796         else if (!cased && isupper(*p))
3797             cased = 1;
3798     }
3799     return PyBool_FromLong(cased);
3800 }
3801
3802
3803 PyDoc_STRVAR(istitle__doc__,
3804 "S.istitle() -> bool\n\
3805 \n\
3806 Return True if S is a titlecased string and there is at least one\n\
3807 character in S, i.e. uppercase characters may only follow uncased\n\
3808 characters and lowercase characters only cased ones. Return False\n\
3809 otherwise.");
3810
3811 static PyObject*
3812 string_istitle(PyStringObject *self, PyObject *uncased)
3813 {
3814     register const unsigned char *p
3815         = (unsigned char *) PyString_AS_STRING(self);
3816     register const unsigned char *e;
3817     int cased, previous_is_cased;
3818
3819     /* Shortcut for single character strings */
3820     if (PyString_GET_SIZE(self) == 1)
3821         return PyBool_FromLong(isupper(*p) != 0);
3822
3823     /* Special case for empty strings */
3824     if (PyString_GET_SIZE(self) == 0)
3825         return PyBool_FromLong(0);
3826
3827     e = p + PyString_GET_SIZE(self);
3828     cased = 0;
3829     previous_is_cased = 0;
3830     for (; p < e; p++) {
3831         register const unsigned char ch = *p;
3832
3833         if (isupper(ch)) {
3834             if (previous_is_cased)
3835                 return PyBool_FromLong(0);
3836             previous_is_cased = 1;
3837             cased = 1;
3838         }
3839         else if (islower(ch)) {
3840             if (!previous_is_cased)
3841                 return PyBool_FromLong(0);
3842             previous_is_cased = 1;
3843             cased = 1;
3844         }
3845         else
3846             previous_is_cased = 0;
3847     }
3848     return PyBool_FromLong(cased);
3849 }
3850
3851
3852 PyDoc_STRVAR(splitlines__doc__,
3853 "S.splitlines([keepends]) -> list of strings\n\
3854 \n\
3855 Return a list of the lines in S, breaking at line boundaries.\n\
3856 Line breaks are not included in the resulting list unless keepends\n\
3857 is given and true.");
3858
3859 static PyObject*
3860 string_splitlines(PyStringObject *self, PyObject *args)
3861 {
3862     register Py_ssize_t i;
3863     register Py_ssize_t j;
3864     Py_ssize_t len;
3865     int keepends = 0;
3866     PyObject *list;
3867     PyObject *str;
3868     char *data;
3869
3870     if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
3871         return NULL;
3872
3873     data = PyString_AS_STRING(self);
3874     len = PyString_GET_SIZE(self);
3875
3876     /* This does not use the preallocated list because splitlines is
3877        usually run with hundreds of newlines.  The overhead of
3878        switching between PyList_SET_ITEM and append causes about a
3879        2-3% slowdown for that common case.  A smarter implementation
3880        could move the if check out, so the SET_ITEMs are done first
3881        and the appends only done when the prealloc buffer is full.
3882        That's too much work for little gain.*/
3883
3884     list = PyList_New(0);
3885     if (!list)
3886         goto onError;
3887
3888     for (i = j = 0; i < len; ) {
3889         Py_ssize_t eol;
3890
3891         /* Find a line and append it */
3892         while (i < len && data[i] != '\n' && data[i] != '\r')
3893             i++;
3894
3895         /* Skip the line break reading CRLF as one line break */
3896         eol = i;
3897         if (i < len) {
3898             if (data[i] == '\r' && i + 1 < len &&
3899                 data[i+1] == '\n')
3900                 i += 2;
3901             else
3902                 i++;
3903             if (keepends)
3904                 eol = i;
3905         }
3906         SPLIT_APPEND(data, j, eol);
3907         j = i;
3908     }
3909     if (j < len) {
3910         SPLIT_APPEND(data, j, len);
3911     }
3912
3913     return list;
3914
3915  onError:
3916     Py_XDECREF(list);
3917     return NULL;
3918 }
3919
3920 PyDoc_STRVAR(sizeof__doc__,
3921 "S.__sizeof__() -> size of S in memory, in bytes");
3922
3923 static PyObject *
3924 string_sizeof(PyStringObject *v)
3925 {
3926         Py_ssize_t res;
3927         res = sizeof(PyStringObject) + v->ob_size * v->ob_type->tp_itemsize;
3928         return PyInt_FromSsize_t(res);
3929 }
3930
3931 #undef SPLIT_APPEND
3932 #undef SPLIT_ADD
3933 #undef MAX_PREALLOC
3934 #undef PREALLOC_SIZE
3935
3936 static PyObject *
3937 string_getnewargs(PyStringObject *v)
3938 {
3939         return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
3940 }
3941
3942
3943 #include "stringlib/string_format.h"
3944
3945 PyDoc_STRVAR(format__doc__,
3946 "S.format(*args, **kwargs) -> unicode\n\
3947 \n\
3948 ");
3949
3950 static PyObject *
3951 string__format__(PyObject* self, PyObject* args)
3952 {
3953     PyObject *format_spec;
3954     PyObject *result = NULL;
3955     PyObject *tmp = NULL;
3956
3957     /* If 2.x, convert format_spec to the same type as value */
3958     /* This is to allow things like u''.format('') */
3959     if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
3960         goto done;
3961     if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
3962         PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
3963                      "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
3964         goto done;
3965     }
3966     tmp = PyObject_Str(format_spec);
3967     if (tmp == NULL)
3968         goto done;
3969     format_spec = tmp;
3970
3971     result = _PyBytes_FormatAdvanced(self,
3972                                      PyString_AS_STRING(format_spec),
3973                                      PyString_GET_SIZE(format_spec));
3974 done:
3975     Py_XDECREF(tmp);
3976     return result;
3977 }
3978
3979 PyDoc_STRVAR(p_format__doc__,
3980 "S.__format__(format_spec) -> unicode\n\
3981 \n\
3982 ");
3983
3984 \f
3985 static PyMethodDef
3986 string_methods[] = {
3987         /* Counterparts of the obsolete stropmodule functions; except
3988            string.maketrans(). */
3989         {"join", (PyCFunction)string_join, METH_O, join__doc__},
3990         {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3991         {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
3992         {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3993         {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
3994         {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3995         {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3996         {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3997         {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3998         {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3999         {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
4000         {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
4001         {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
4002          capitalize__doc__},
4003         {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
4004         {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
4005          endswith__doc__},
4006         {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
4007         {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
4008         {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
4009         {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
4010         {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
4011         {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
4012         {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
4013         {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
4014         {"rpartition", (PyCFunction)string_rpartition, METH_O,
4015          rpartition__doc__},
4016         {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
4017          startswith__doc__},
4018         {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
4019         {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
4020          swapcase__doc__},
4021         {"translate", (PyCFunction)string_translate, METH_VARARGS,
4022          translate__doc__},
4023         {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
4024         {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
4025         {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
4026         {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
4027         {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
4028         {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
4029         {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
4030         {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
4031         {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
4032         {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
4033         {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
4034         {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
4035          expandtabs__doc__},
4036         {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
4037          splitlines__doc__},
4038         {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
4039          sizeof__doc__},
4040         {"__getnewargs__",      (PyCFunction)string_getnewargs, METH_NOARGS},
4041         {NULL,     NULL}                     /* sentinel */
4042 };
4043
4044 static PyObject *
4045 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
4046
4047 static PyObject *
4048 string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4049 {
4050         PyObject *x = NULL;
4051         static char *kwlist[] = {"object", 0};
4052
4053         if (type != &PyString_Type)
4054                 return str_subtype_new(type, args, kwds);
4055         if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
4056                 return NULL;
4057         if (x == NULL)
4058                 return PyString_FromString("");
4059         return PyObject_Str(x);
4060 }
4061
4062 static PyObject *
4063 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4064 {
4065         PyObject *tmp, *pnew;
4066         Py_ssize_t n;
4067
4068         assert(PyType_IsSubtype(type, &PyString_Type));
4069         tmp = string_new(&PyString_Type, args, kwds);
4070         if (tmp == NULL)
4071                 return NULL;
4072         assert(PyString_CheckExact(tmp));
4073         n = PyString_GET_SIZE(tmp);
4074         pnew = type->tp_alloc(type, n);
4075         if (pnew != NULL) {
4076                 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
4077                 ((PyStringObject *)pnew)->ob_shash =
4078                         ((PyStringObject *)tmp)->ob_shash;
4079                 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
4080         }
4081         Py_DECREF(tmp);
4082         return pnew;
4083 }
4084
4085 static PyObject *
4086 basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4087 {
4088         PyErr_SetString(PyExc_TypeError,
4089                         "The basestring type cannot be instantiated");
4090         return NULL;
4091 }
4092
4093 static PyObject *
4094 string_mod(PyObject *v, PyObject *w)
4095 {
4096         if (!PyString_Check(v)) {
4097                 Py_INCREF(Py_NotImplemented);
4098                 return Py_NotImplemented;
4099         }
4100         return PyString_Format(v, w);
4101 }
4102
4103 PyDoc_STRVAR(basestring_doc,
4104 "Type basestring cannot be instantiated; it is the base for str and unicode.");
4105
4106 static PyNumberMethods string_as_number = {
4107         0,                      /*nb_add*/
4108         0,                      /*nb_subtract*/
4109         0,                      /*nb_multiply*/
4110         0,                      /*nb_divide*/
4111         string_mod,             /*nb_remainder*/
4112 };
4113
4114
4115 PyTypeObject PyBaseString_Type = {
4116         PyVarObject_HEAD_INIT(&PyType_Type, 0)
4117         "basestring",
4118         0,
4119         0,
4120         0,                                      /* tp_dealloc */
4121         0,                                      /* tp_print */
4122         0,                                      /* tp_getattr */
4123         0,                                      /* tp_setattr */
4124         0,                                      /* tp_compare */
4125         0,                                      /* tp_repr */
4126         0,                                      /* tp_as_number */
4127         0,                                      /* tp_as_sequence */
4128         0,                                      /* tp_as_mapping */
4129         0,                                      /* tp_hash */
4130         0,                                      /* tp_call */
4131         0,                                      /* tp_str */
4132         0,                                      /* tp_getattro */
4133         0,                                      /* tp_setattro */
4134         0,                                      /* tp_as_buffer */
4135         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
4136         basestring_doc,                         /* tp_doc */
4137         0,                                      /* tp_traverse */
4138         0,                                      /* tp_clear */
4139         0,                                      /* tp_richcompare */
4140         0,                                      /* tp_weaklistoffset */
4141         0,                                      /* tp_iter */
4142         0,                                      /* tp_iternext */
4143         0,                                      /* tp_methods */
4144         0,                                      /* tp_members */
4145         0,                                      /* tp_getset */
4146         &PyBaseObject_Type,                     /* tp_base */
4147         0,                                      /* tp_dict */
4148         0,                                      /* tp_descr_get */
4149         0,                                      /* tp_descr_set */
4150         0,                                      /* tp_dictoffset */
4151         0,                                      /* tp_init */
4152         0,                                      /* tp_alloc */
4153         basestring_new,                         /* tp_new */
4154         0,                                      /* tp_free */
4155 };
4156
4157 PyDoc_STRVAR(string_doc,
4158 "str(object) -> string\n\
4159 \n\
4160 Return a nice string representation of the object.\n\
4161 If the argument is a string, the return value is the same object.");
4162
4163 PyTypeObject PyString_Type = {
4164         PyVarObject_HEAD_INIT(&PyType_Type, 0)
4165         "str",
4166         sizeof(PyStringObject),
4167         sizeof(char),
4168         string_dealloc,                         /* tp_dealloc */
4169         (printfunc)string_print,                /* tp_print */
4170         0,                                      /* tp_getattr */
4171         0,                                      /* tp_setattr */
4172         0,                                      /* tp_compare */
4173         string_repr,                            /* tp_repr */
4174         &string_as_number,                      /* tp_as_number */
4175         &string_as_sequence,                    /* tp_as_sequence */
4176         &string_as_mapping,                     /* tp_as_mapping */
4177         (hashfunc)string_hash,                  /* tp_hash */
4178         0,                                      /* tp_call */
4179         string_str,                             /* tp_str */
4180         PyObject_GenericGetAttr,                /* tp_getattro */
4181         0,                                      /* tp_setattro */
4182         &string_as_buffer,                      /* tp_as_buffer */
4183         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
4184                 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
4185                 Py_TPFLAGS_HAVE_NEWBUFFER,      /* tp_flags */
4186         string_doc,                             /* tp_doc */
4187         0,                                      /* tp_traverse */
4188         0,                                      /* tp_clear */
4189         (richcmpfunc)string_richcompare,        /* tp_richcompare */
4190         0,                                      /* tp_weaklistoffset */
4191         0,                                      /* tp_iter */
4192         0,                                      /* tp_iternext */
4193         string_methods,                         /* tp_methods */
4194         0,                                      /* tp_members */
4195         0,                                      /* tp_getset */
4196         &PyBaseString_Type,                     /* tp_base */
4197         0,                                      /* tp_dict */
4198         0,                                      /* tp_descr_get */
4199         0,                                      /* tp_descr_set */
4200         0,                                      /* tp_dictoffset */
4201         0,                                      /* tp_init */
4202         0,                                      /* tp_alloc */
4203         string_new,                             /* tp_new */
4204         PyObject_Del,                           /* tp_free */
4205 };
4206
4207 void
4208 PyString_Concat(register PyObject **pv, register PyObject *w)
4209 {
4210         register PyObject *v;
4211         if (*pv == NULL)
4212                 return;
4213         if (w == NULL || !PyString_Check(*pv)) {
4214                 Py_DECREF(*pv);
4215                 *pv = NULL;
4216                 return;
4217         }
4218         v = string_concat((PyStringObject *) *pv, w);
4219         Py_DECREF(*pv);
4220         *pv = v;
4221 }
4222
4223 void
4224 PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
4225 {
4226         PyString_Concat(pv, w);
4227         Py_XDECREF(w);
4228 }
4229
4230
4231 /* The following function breaks the notion that strings are immutable:
4232    it changes the size of a string.  We get away with this only if there
4233    is only one module referencing the object.  You can also think of it
4234    as creating a new string object and destroying the old one, only
4235    more efficiently.  In any case, don't use this if the string may
4236    already be known to some other part of the code...
4237    Note that if there's not enough memory to resize the string, the original
4238    string object at *pv is deallocated, *pv is set to NULL, an "out of
4239    memory" exception is set, and -1 is returned.  Else (on success) 0 is
4240    returned, and the value in *pv may or may not be the same as on input.
4241    As always, an extra byte is allocated for a trailing \0 byte (newsize
4242    does *not* include that), and a trailing \0 byte is stored.
4243 */
4244
4245 int
4246 _PyString_Resize(PyObject **pv, Py_ssize_t newsize)
4247 {
4248         register PyObject *v;
4249         register PyStringObject *sv;
4250         v = *pv;
4251         if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
4252             PyString_CHECK_INTERNED(v)) {
4253                 *pv = 0;
4254                 Py_DECREF(v);
4255                 PyErr_BadInternalCall();
4256                 return -1;
4257         }
4258         /* XXX UNREF/NEWREF interface should be more symmetrical */
4259         _Py_DEC_REFTOTAL;
4260         _Py_ForgetReference(v);
4261         *pv = (PyObject *)
4262                 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
4263         if (*pv == NULL) {
4264                 PyObject_Del(v);
4265                 PyErr_NoMemory();
4266                 return -1;
4267         }
4268         _Py_NewReference(*pv);
4269         sv = (PyStringObject *) *pv;
4270         Py_SIZE(sv) = newsize;
4271         sv->ob_sval[newsize] = '\0';
4272         sv->ob_shash = -1;      /* invalidate cached hash value */
4273         return 0;
4274 }
4275
4276 /* Helpers for formatstring */
4277
4278 Py_LOCAL_INLINE(PyObject *)
4279 getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
4280 {
4281         Py_ssize_t argidx = *p_argidx;
4282         if (argidx < arglen) {
4283                 (*p_argidx)++;
4284                 if (arglen < 0)
4285                         return args;
4286                 else
4287                         return PyTuple_GetItem(args, argidx);
4288         }
4289         PyErr_SetString(PyExc_TypeError,
4290                         "not enough arguments for format string");
4291         return NULL;
4292 }
4293
4294 /* Format codes
4295  * F_LJUST      '-'
4296  * F_SIGN       '+'
4297  * F_BLANK      ' '
4298  * F_ALT        '#'
4299  * F_ZERO       '0'
4300  */
4301 #define F_LJUST (1<<0)
4302 #define F_SIGN  (1<<1)
4303 #define F_BLANK (1<<2)
4304 #define F_ALT   (1<<3)
4305 #define F_ZERO  (1<<4)
4306
4307 Py_LOCAL_INLINE(int)
4308 formatfloat(char *buf, size_t buflen, int flags,
4309             int prec, int type, PyObject *v)
4310 {
4311         /* fmt = '%#.' + `prec` + `type`
4312            worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
4313         char fmt[20];
4314         double x;
4315         x = PyFloat_AsDouble(v);
4316         if (x == -1.0 && PyErr_Occurred()) {
4317                 PyErr_Format(PyExc_TypeError, "float argument required, "
4318                              "not %.200s", Py_TYPE(v)->tp_name);
4319                 return -1;
4320         }
4321         if (prec < 0)
4322                 prec = 6;
4323         if (type == 'f' && fabs(x)/1e25 >= 1e25)
4324                 type = 'g';
4325         /* Worst case length calc to ensure no buffer overrun:
4326
4327            'g' formats:
4328              fmt = %#.<prec>g
4329              buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
4330                 for any double rep.)
4331              len = 1 + prec + 1 + 2 + 5 = 9 + prec
4332
4333            'f' formats:
4334              buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4335              len = 1 + 50 + 1 + prec = 52 + prec
4336
4337            If prec=0 the effective precision is 1 (the leading digit is
4338            always given), therefore increase the length by one.
4339
4340         */
4341         if (((type == 'g' || type == 'G') &&
4342               buflen <= (size_t)10 + (size_t)prec) ||
4343             (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
4344                 PyErr_SetString(PyExc_OverflowError,
4345                         "formatted float is too long (precision too large?)");
4346                 return -1;
4347         }
4348         PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4349                       (flags&F_ALT) ? "#" : "",
4350                       prec, type);
4351         PyOS_ascii_formatd(buf, buflen, fmt, x);
4352         return (int)strlen(buf);
4353 }
4354
4355 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4356  * the F_ALT flag, for Python's long (unbounded) ints.  It's not used for
4357  * Python's regular ints.
4358  * Return value:  a new PyString*, or NULL if error.
4359  *  .  *pbuf is set to point into it,
4360  *     *plen set to the # of chars following that.
4361  *     Caller must decref it when done using pbuf.
4362  *     The string starting at *pbuf is of the form
4363  *         "-"? ("0x" | "0X")? digit+
4364  *     "0x"/"0X" are present only for x and X conversions, with F_ALT
4365  *         set in flags.  The case of hex digits will be correct,
4366  *     There will be at least prec digits, zero-filled on the left if
4367  *         necessary to get that many.
4368  * val          object to be converted
4369  * flags        bitmask of format flags; only F_ALT is looked at
4370  * prec         minimum number of digits; 0-fill on left if needed
4371  * type         a character in [duoxX]; u acts the same as d
4372  *
4373  * CAUTION:  o, x and X conversions on regular ints can never
4374  * produce a '-' sign, but can for Python's unbounded ints.
4375  */
4376 PyObject*
4377 _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4378                      char **pbuf, int *plen)
4379 {
4380         PyObject *result = NULL;
4381         char *buf;
4382         Py_ssize_t i;
4383         int sign;       /* 1 if '-', else 0 */
4384         int len;        /* number of characters */
4385         Py_ssize_t llen;
4386         int numdigits;  /* len == numnondigits + numdigits */
4387         int numnondigits = 0;
4388
4389         switch (type) {
4390         case 'd':
4391         case 'u':
4392                 result = Py_TYPE(val)->tp_str(val);
4393                 break;
4394         case 'o':
4395                 result = Py_TYPE(val)->tp_as_number->nb_oct(val);
4396                 break;
4397         case 'x':
4398         case 'X':
4399                 numnondigits = 2;
4400                 result = Py_TYPE(val)->tp_as_number->nb_hex(val);
4401                 break;
4402         default:
4403                 assert(!"'type' not in [duoxX]");
4404         }
4405         if (!result)
4406                 return NULL;
4407
4408         buf = PyString_AsString(result);
4409         if (!buf) {
4410                 Py_DECREF(result);
4411                 return NULL;
4412         }
4413
4414         /* To modify the string in-place, there can only be one reference. */
4415         if (Py_REFCNT(result) != 1) {
4416                 PyErr_BadInternalCall();
4417                 return NULL;
4418         }
4419         llen = PyString_Size(result);
4420         if (llen > INT_MAX) {
4421                 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4422                 return NULL;
4423         }
4424         len = (int)llen;
4425         if (buf[len-1] == 'L') {
4426                 --len;
4427                 buf[len] = '\0';
4428         }
4429         sign = buf[0] == '-';
4430         numnondigits += sign;
4431         numdigits = len - numnondigits;
4432         assert(numdigits > 0);
4433
4434         /* Get rid of base marker unless F_ALT */
4435         if ((flags & F_ALT) == 0) {
4436                 /* Need to skip 0x, 0X or 0. */
4437                 int skipped = 0;
4438                 switch (type) {
4439                 case 'o':
4440                         assert(buf[sign] == '0');
4441                         /* If 0 is only digit, leave it alone. */
4442                         if (numdigits > 1) {
4443                                 skipped = 1;
4444                                 --numdigits;
4445                         }
4446                         break;
4447                 case 'x':
4448                 case 'X':
4449                         assert(buf[sign] == '0');
4450                         assert(buf[sign + 1] == 'x');
4451                         skipped = 2;
4452                         numnondigits -= 2;
4453                         break;
4454                 }
4455                 if (skipped) {
4456                         buf += skipped;
4457                         len -= skipped;
4458                         if (sign)
4459                                 buf[0] = '-';
4460                 }
4461                 assert(len == numnondigits + numdigits);
4462                 assert(numdigits > 0);
4463         }
4464
4465         /* Fill with leading zeroes to meet minimum width. */
4466         if (prec > numdigits) {
4467                 PyObject *r1 = PyString_FromStringAndSize(NULL,
4468                                         numnondigits + prec);
4469                 char *b1;
4470                 if (!r1) {
4471                         Py_DECREF(result);
4472                         return NULL;
4473                 }
4474                 b1 = PyString_AS_STRING(r1);
4475                 for (i = 0; i < numnondigits; ++i)
4476                         *b1++ = *buf++;
4477                 for (i = 0; i < prec - numdigits; i++)
4478                         *b1++ = '0';
4479                 for (i = 0; i < numdigits; i++)
4480                         *b1++ = *buf++;
4481                 *b1 = '\0';
4482                 Py_DECREF(result);
4483                 result = r1;
4484                 buf = PyString_AS_STRING(result);
4485                 len = numnondigits + prec;
4486         }
4487
4488         /* Fix up case for hex conversions. */
4489         if (type == 'X') {
4490                 /* Need to convert all lower case letters to upper case.
4491                    and need to convert 0x to 0X (and -0x to -0X). */
4492                 for (i = 0; i < len; i++)
4493                         if (buf[i] >= 'a' && buf[i] <= 'x')
4494                                 buf[i] -= 'a'-'A';
4495         }
4496         *pbuf = buf;
4497         *plen = len;
4498         return result;
4499 }
4500
4501 Py_LOCAL_INLINE(int)
4502 formatint(char *buf, size_t buflen, int flags,
4503           int prec, int type, PyObject *v)
4504 {
4505         /* fmt = '%#.' + `prec` + 'l' + `type`
4506            worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4507            + 1 + 1 = 24 */
4508         char fmt[64];   /* plenty big enough! */
4509         char *sign;
4510         long x;
4511
4512         x = PyInt_AsLong(v);
4513         if (x == -1 && PyErr_Occurred()) {
4514                 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4515                              Py_TYPE(v)->tp_name);
4516                 return -1;
4517         }
4518         if (x < 0 && type == 'u') {
4519                 type = 'd';
4520         }
4521         if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4522                 sign = "-";
4523         else
4524                 sign = "";
4525         if (prec < 0)
4526                 prec = 1;
4527
4528         if ((flags & F_ALT) &&
4529             (type == 'x' || type == 'X')) {
4530                 /* When converting under %#x or %#X, there are a number
4531                  * of issues that cause pain:
4532                  * - when 0 is being converted, the C standard leaves off
4533                  *   the '0x' or '0X', which is inconsistent with other
4534                  *   %#x/%#X conversions and inconsistent with Python's
4535                  *   hex() function
4536                  * - there are platforms that violate the standard and
4537                  *   convert 0 with the '0x' or '0X'
4538                  *   (Metrowerks, Compaq Tru64)
4539                  * - there are platforms that give '0x' when converting
4540                  *   under %#X, but convert 0 in accordance with the
4541                  *   standard (OS/2 EMX)
4542                  *
4543                  * We can achieve the desired consistency by inserting our
4544                  * own '0x' or '0X' prefix, and substituting %x/%X in place
4545                  * of %#x/%#X.
4546                  *
4547                  * Note that this is the same approach as used in
4548                  * formatint() in unicodeobject.c
4549                  */
4550                 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4551                               sign, type, prec, type);
4552         }
4553         else {
4554                 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4555                               sign, (flags&F_ALT) ? "#" : "",
4556                               prec, type);
4557         }
4558
4559         /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4560          * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4561          */
4562         if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4563                 PyErr_SetString(PyExc_OverflowError,
4564                     "formatted integer is too long (precision too large?)");
4565                 return -1;
4566         }
4567         if (sign[0])
4568                 PyOS_snprintf(buf, buflen, fmt, -x);
4569         else
4570                 PyOS_snprintf(buf, buflen, fmt, x);
4571         return (int)strlen(buf);
4572 }
4573
4574 Py_LOCAL_INLINE(int)
4575 formatchar(char *buf, size_t buflen, PyObject *v)
4576 {
4577         /* presume that the buffer is at least 2 characters long */
4578         if (PyString_Check(v)) {
4579                 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4580                         return -1;
4581         }
4582         else {
4583                 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4584                         return -1;
4585         }
4586         buf[1] = '\0';
4587         return 1;
4588 }
4589
4590 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4591
4592    FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4593    chars are formatted. XXX This is a magic number. Each formatting
4594    routine does bounds checking to ensure no overflow, but a better
4595    solution may be to malloc a buffer of appropriate size for each
4596    format. For now, the current solution is sufficient.
4597 */
4598 #define FORMATBUFLEN (size_t)120
4599
4600 PyObject *
4601 PyString_Format(PyObject *format, PyObject *args)
4602 {
4603         char *fmt, *res;
4604         Py_ssize_t arglen, argidx;
4605         Py_ssize_t reslen, rescnt, fmtcnt;
4606         int args_owned = 0;
4607         PyObject *result, *orig_args;
4608 #ifdef Py_USING_UNICODE
4609         PyObject *v, *w;
4610 #endif
4611         PyObject *dict = NULL;
4612         if (format == NULL || !PyString_Check(format) || args == NULL) {
4613                 PyErr_BadInternalCall();
4614                 return NULL;
4615         }
4616         orig_args = args;
4617         fmt = PyString_AS_STRING(format);
4618         fmtcnt = PyString_GET_SIZE(format);
4619         reslen = rescnt = fmtcnt + 100;
4620         result = PyString_FromStringAndSize((char *)NULL, reslen);
4621         if (result == NULL)
4622                 return NULL;
4623         res = PyString_AsString(result);
4624         if (PyTuple_Check(args)) {
4625                 arglen = PyTuple_GET_SIZE(args);
4626                 argidx = 0;
4627         }
4628         else {
4629                 arglen = -1;
4630                 argidx = -2;
4631         }
4632         if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&
4633             !PyObject_TypeCheck(args, &PyBaseString_Type))
4634                 dict = args;
4635         while (--fmtcnt >= 0) {
4636                 if (*fmt != '%') {
4637                         if (--rescnt < 0) {
4638                                 rescnt = fmtcnt + 100;
4639                                 reslen += rescnt;
4640                                 if (_PyString_Resize(&result, reslen) < 0)
4641                                         return NULL;
4642                                 res = PyString_AS_STRING(result)
4643                                         + reslen - rescnt;
4644                                 --rescnt;
4645                         }
4646                         *res++ = *fmt++;
4647                 }
4648                 else {
4649                         /* Got a format specifier */
4650                         int flags = 0;
4651                         Py_ssize_t width = -1;
4652                         int prec = -1;
4653                         int c = '\0';
4654                         int fill;
4655                         int isnumok;
4656                         PyObject *v = NULL;
4657                         PyObject *temp = NULL;
4658                         char *pbuf;
4659                         int sign;
4660                         Py_ssize_t len;
4661                         char formatbuf[FORMATBUFLEN];
4662                              /* For format{float,int,char}() */
4663 #ifdef Py_USING_UNICODE
4664                         char *fmt_start = fmt;
4665                         Py_ssize_t argidx_start = argidx;
4666 #endif
4667
4668                         fmt++;
4669                         if (*fmt == '(') {
4670                                 char *keystart;
4671                                 Py_ssize_t keylen;
4672                                 PyObject *key;
4673                                 int pcount = 1;
4674
4675                                 if (dict == NULL) {
4676                                         PyErr_SetString(PyExc_TypeError,
4677                                                  "format requires a mapping");
4678                                         goto error;
4679                                 }
4680                                 ++fmt;
4681                                 --fmtcnt;
4682                                 keystart = fmt;
4683                                 /* Skip over balanced parentheses */
4684                                 while (pcount > 0 && --fmtcnt >= 0) {
4685                                         if (*fmt == ')')
4686                                                 --pcount;
4687                                         else if (*fmt == '(')
4688                                                 ++pcount;
4689                                         fmt++;
4690                                 }
4691                                 keylen = fmt - keystart - 1;
4692                                 if (fmtcnt < 0 || pcount > 0) {
4693                                         PyErr_SetString(PyExc_ValueError,
4694                                                    "incomplete format key");
4695                                         goto error;
4696                                 }
4697                                 key = PyString_FromStringAndSize(keystart,
4698                                                                  keylen);
4699                                 if (key == NULL)
4700                                         goto error;
4701                                 if (args_owned) {
4702                                         Py_DECREF(args);
4703                                         args_owned = 0;
4704                                 }
4705                                 args = PyObject_GetItem(dict, key);
4706                                 Py_DECREF(key);
4707                                 if (args == NULL) {
4708                                         goto error;
4709                                 }
4710                                 args_owned = 1;
4711                                 arglen = -1;
4712                                 argidx = -2;
4713                         }
4714                         while (--fmtcnt >= 0) {
4715                                 switch (c = *fmt++) {
4716                                 case '-': flags |= F_LJUST; continue;
4717                                 case '+': flags |= F_SIGN; continue;
4718                                 case ' ': flags |= F_BLANK; continue;
4719                                 case '#': flags |= F_ALT; continue;
4720                                 case '0': flags |= F_ZERO; continue;
4721                                 }
4722                                 break;
4723                         }
4724                         if (c == '*') {
4725                                 v = getnextarg(args, arglen, &argidx);
4726                                 if (v == NULL)
4727                                         goto error;
4728                                 if (!PyInt_Check(v)) {
4729                                         PyErr_SetString(PyExc_TypeError,
4730                                                         "* wants int");
4731                                         goto error;
4732                                 }
4733                                 width = PyInt_AsLong(v);
4734                                 if (width < 0) {
4735                                         flags |= F_LJUST;
4736                                         width = -width;
4737                                 }
4738                                 if (--fmtcnt >= 0)
4739                                         c = *fmt++;
4740                         }
4741                         else if (c >= 0 && isdigit(c)) {
4742                                 width = c - '0';
4743                                 while (--fmtcnt >= 0) {
4744                                         c = Py_CHARMASK(*fmt++);
4745                                         if (!isdigit(c))
4746                                                 break;
4747                                         if ((width*10) / 10 != width) {
4748                                                 PyErr_SetString(
4749                                                         PyExc_ValueError,
4750                                                         "width too big");
4751                                                 goto error;
4752                                         }
4753                                         width = width*10 + (c - '0');
4754                                 }
4755                         }
4756                         if (c == '.') {
4757                                 prec = 0;
4758                                 if (--fmtcnt >= 0)
4759                                         c = *fmt++;
4760                                 if (c == '*') {
4761                                         v = getnextarg(args, arglen, &argidx);
4762                                         if (v == NULL)
4763                                                 goto error;
4764                                         if (!PyInt_Check(v)) {
4765                                                 PyErr_SetString(
4766                                                         PyExc_TypeError,
4767                                                         "* wants int");
4768                                                 goto error;
4769                                         }
4770                                         prec = PyInt_AsLong(v);
4771                                         if (prec < 0)
4772                                                 prec = 0;
4773                                         if (--fmtcnt >= 0)
4774                                                 c = *fmt++;
4775                                 }
4776                                 else if (c >= 0 && isdigit(c)) {
4777                                         prec = c - '0';
4778                                         while (--fmtcnt >= 0) {
4779                                                 c = Py_CHARMASK(*fmt++);
4780                                                 if (!isdigit(c))
4781                                                         break;
4782                                                 if ((prec*10) / 10 != prec) {
4783                                                         PyErr_SetString(
4784                                                             PyExc_ValueError,
4785                                                             "prec too big");
4786                                                         goto error;
4787                                                 }
4788                                                 prec = prec*10 + (c - '0');
4789                                         }
4790                                 }
4791                         } /* prec */
4792                         if (fmtcnt >= 0) {
4793                                 if (c == 'h' || c == 'l' || c == 'L') {
4794                                         if (--fmtcnt >= 0)
4795                                                 c = *fmt++;
4796                                 }
4797                         }
4798                         if (fmtcnt < 0) {
4799                                 PyErr_SetString(PyExc_ValueError,
4800                                                 "incomplete format");
4801                                 goto error;
4802                         }
4803                         if (c != '%') {
4804                                 v = getnextarg(args, arglen, &argidx);
4805                                 if (v == NULL)
4806                                         goto error;
4807                         }
4808                         sign = 0;
4809                         fill = ' ';
4810                         switch (c) {
4811                         case '%':
4812                                 pbuf = "%";
4813                                 len = 1;
4814                                 break;
4815                         case 's':
4816 #ifdef Py_USING_UNICODE
4817                                 if (PyUnicode_Check(v)) {
4818                                         fmt = fmt_start;
4819                                         argidx = argidx_start;
4820                                         goto unicode;
4821                                 }
4822 #endif
4823                                 temp = _PyObject_Str(v);
4824 #ifdef Py_USING_UNICODE
4825                                 if (temp != NULL && PyUnicode_Check(temp)) {
4826                                         Py_DECREF(temp);
4827                                         fmt = fmt_start;
4828                                         argidx = argidx_start;
4829                                         goto unicode;
4830                                 }
4831 #endif
4832                                 /* Fall through */
4833                         case 'r':
4834                                 if (c == 'r')
4835                                         temp = PyObject_Repr(v);
4836                                 if (temp == NULL)
4837                                         goto error;
4838                                 if (!PyString_Check(temp)) {
4839                                         PyErr_SetString(PyExc_TypeError,
4840                                           "%s argument has non-string str()");
4841                                         Py_DECREF(temp);
4842                                         goto error;
4843                                 }
4844                                 pbuf = PyString_AS_STRING(temp);
4845                                 len = PyString_GET_SIZE(temp);
4846                                 if (prec >= 0 && len > prec)
4847                                         len = prec;
4848                                 break;
4849                         case 'i':
4850                         case 'd':
4851                         case 'u':
4852                         case 'o':
4853                         case 'x':
4854                         case 'X':
4855                                 if (c == 'i')
4856                                         c = 'd';
4857                                 isnumok = 0;
4858                                 if (PyNumber_Check(v)) {
4859                                         PyObject *iobj=NULL;
4860
4861                                         if (PyInt_Check(v) || (PyLong_Check(v))) {
4862                                                 iobj = v;
4863                                                 Py_INCREF(iobj);
4864                                         }
4865                                         else {
4866                                                 iobj = PyNumber_Int(v);
4867                                                 if (iobj==NULL) iobj = PyNumber_Long(v);
4868                                         }
4869                                         if (iobj!=NULL) {
4870                                                 if (PyInt_Check(iobj)) {
4871                                                         isnumok = 1;
4872                                                         pbuf = formatbuf;
4873                                                         len = formatint(pbuf,
4874                                                                         sizeof(formatbuf),
4875                                                                         flags, prec, c, iobj);
4876                                                         Py_DECREF(iobj);
4877                                                         if (len < 0)
4878                                                                 goto error;
4879                                                         sign = 1;
4880                                                 }
4881                                                 else if (PyLong_Check(iobj)) {
4882                                                         int ilen;
4883
4884                                                         isnumok = 1;
4885                                                         temp = _PyString_FormatLong(iobj, flags,
4886                                                                 prec, c, &pbuf, &ilen);
4887                                                         Py_DECREF(iobj);
4888                                                         len = ilen;
4889                                                         if (!temp)
4890                                                                 goto error;
4891                                                         sign = 1;
4892                                                 }
4893                                                 else {
4894                                                         Py_DECREF(iobj);
4895                                                 }
4896                                         }
4897                                 }
4898                                 if (!isnumok) {
4899                                         PyErr_Format(PyExc_TypeError,
4900                                             "%%%c format: a number is required, "
4901                                             "not %.200s", c, Py_TYPE(v)->tp_name);
4902                                         goto error;
4903                                 }
4904                                 if (flags & F_ZERO)
4905                                         fill = '0';
4906                                 break;
4907                         case 'e':
4908                         case 'E':
4909                         case 'f':
4910                         case 'F':
4911                         case 'g':
4912                         case 'G':
4913                                 if (c == 'F')
4914                                         c = 'f';
4915                                 pbuf = formatbuf;
4916                                 len = formatfloat(pbuf, sizeof(formatbuf),
4917                                                   flags, prec, c, v);
4918                                 if (len < 0)
4919                                         goto error;
4920                                 sign = 1;
4921                                 if (flags & F_ZERO)
4922                                         fill = '0';
4923                                 break;
4924                         case 'c':
4925 #ifdef Py_USING_UNICODE
4926                                 if (PyUnicode_Check(v)) {
4927                                         fmt = fmt_start;
4928                                         argidx = argidx_start;
4929                                         goto unicode;
4930                                 }
4931 #endif
4932                                 pbuf = formatbuf;
4933                                 len = formatchar(pbuf, sizeof(formatbuf), v);
4934                                 if (len < 0)
4935                                         goto error;
4936                                 break;
4937                         default:
4938                                 PyErr_Format(PyExc_ValueError,
4939                                   "unsupported format character '%c' (0x%x) "
4940                                   "at index %zd",
4941                                   c, c,
4942                                   (Py_ssize_t)(fmt - 1 -
4943                                                PyString_AsString(format)));
4944                                 goto error;
4945                         }
4946                         if (sign) {
4947                                 if (*pbuf == '-' || *pbuf == '+') {
4948                                         sign = *pbuf++;
4949                                         len--;
4950                                 }
4951                                 else if (flags & F_SIGN)
4952                                         sign = '+';
4953                                 else if (flags & F_BLANK)
4954                                         sign = ' ';
4955                                 else
4956                                         sign = 0;
4957                         }
4958                         if (width < len)
4959                                 width = len;
4960                         if (rescnt - (sign != 0) < width) {
4961                                 reslen -= rescnt;
4962                                 rescnt = width + fmtcnt + 100;
4963                                 reslen += rescnt;
4964                                 if (reslen < 0) {
4965                                         Py_DECREF(result);
4966                                         Py_XDECREF(temp);
4967                                         return PyErr_NoMemory();
4968                                 }
4969                                 if (_PyString_Resize(&result, reslen) < 0) {
4970                                         Py_XDECREF(temp);
4971                                         return NULL;
4972                                 }
4973                                 res = PyString_AS_STRING(result)
4974                                         + reslen - rescnt;
4975                         }
4976                         if (sign) {
4977                                 if (fill != ' ')
4978                                         *res++ = sign;
4979                                 rescnt--;
4980                                 if (width > len)
4981                                         width--;
4982                         }
4983                         if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4984                                 assert(pbuf[0] == '0');
4985                                 assert(pbuf[1] == c);
4986                                 if (fill != ' ') {
4987                                         *res++ = *pbuf++;
4988                                         *res++ = *pbuf++;
4989                                 }
4990                                 rescnt -= 2;
4991                                 width -= 2;
4992                                 if (width < 0)
4993                                         width = 0;
4994                                 len -= 2;
4995                         }
4996                         if (width > len && !(flags & F_LJUST)) {
4997                                 do {
4998                                         --rescnt;
4999                                         *res++ = fill;
5000                                 } while (--width > len);
5001                         }
5002                         if (fill == ' ') {
5003                                 if (sign)
5004                                         *res++ = sign;
5005                                 if ((flags & F_ALT) &&
5006                                     (c == 'x' || c == 'X')) {
5007                                         assert(pbuf[0] == '0');
5008                                         assert(pbuf[1] == c);
5009                                         *res++ = *pbuf++;
5010                                         *res++ = *pbuf++;
5011                                 }
5012                         }
5013                         Py_MEMCPY(res, pbuf, len);
5014                         res += len;
5015                         rescnt -= len;
5016                         while (--width >= len) {
5017                                 --rescnt;
5018                                 *res++ = ' ';
5019                         }
5020                         if (dict && (argidx < arglen) && c != '%') {
5021                                 PyErr_SetString(PyExc_TypeError,
5022                                            "not all arguments converted during string formatting");
5023                                 Py_XDECREF(temp);
5024                                 goto error;
5025                         }
5026                         Py_XDECREF(temp);
5027                 } /* '%' */
5028         } /* until end */
5029         if (argidx < arglen && !dict) {
5030                 PyErr_SetString(PyExc_TypeError,
5031                                 "not all arguments converted during string formatting");
5032                 goto error;
5033         }
5034         if (args_owned) {
5035                 Py_DECREF(args);
5036         }
5037         _PyString_Resize(&result, reslen - rescnt);
5038         return result;
5039
5040 #ifdef Py_USING_UNICODE
5041  unicode:
5042         if (args_owned) {
5043                 Py_DECREF(args);
5044                 args_owned = 0;
5045         }
5046         /* Fiddle args right (remove the first argidx arguments) */
5047         if (PyTuple_Check(orig_args) && argidx > 0) {
5048                 PyObject *v;
5049                 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
5050                 v = PyTuple_New(n);
5051                 if (v == NULL)
5052                         goto error;
5053                 while (--n >= 0) {
5054                         PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
5055                         Py_INCREF(w);
5056                         PyTuple_SET_ITEM(v, n, w);
5057                 }
5058                 args = v;
5059         } else {
5060                 Py_INCREF(orig_args);
5061                 args = orig_args;
5062         }
5063         args_owned = 1;
5064         /* Take what we have of the result and let the Unicode formatting
5065            function format the rest of the input. */
5066         rescnt = res - PyString_AS_STRING(result);
5067         if (_PyString_Resize(&result, rescnt))
5068                 goto error;
5069         fmtcnt = PyString_GET_SIZE(format) - \
5070                  (fmt - PyString_AS_STRING(format));
5071         format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
5072         if (format == NULL)
5073                 goto error;
5074         v = PyUnicode_Format(format, args);
5075         Py_DECREF(format);
5076         if (v == NULL)
5077                 goto error;
5078         /* Paste what we have (result) to what the Unicode formatting
5079            function returned (v) and return the result (or error) */
5080         w = PyUnicode_Concat(result, v);
5081         Py_DECREF(result);
5082         Py_DECREF(v);
5083         Py_DECREF(args);
5084         return w;
5085 #endif /* Py_USING_UNICODE */
5086
5087  error:
5088         Py_DECREF(result);
5089         if (args_owned) {
5090                 Py_DECREF(args);
5091         }
5092         return NULL;
5093 }
5094
5095 void
5096 PyString_InternInPlace(PyObject **p)
5097 {
5098         register PyStringObject *s = (PyStringObject *)(*p);
5099         PyObject *t;
5100         if (s == NULL || !PyString_Check(s))
5101                 Py_FatalError("PyString_InternInPlace: strings only please!");
5102         /* If it's a string subclass, we don't really know what putting
5103            it in the interned dict might do. */
5104         if (!PyString_CheckExact(s))
5105                 return;
5106         if (PyString_CHECK_INTERNED(s))
5107                 return;
5108         if (interned == NULL) {
5109                 interned = PyDict_New();
5110                 if (interned == NULL) {
5111                         PyErr_Clear(); /* Don't leave an exception */
5112                         return;
5113                 }
5114         }
5115         t = PyDict_GetItem(interned, (PyObject *)s);
5116         if (t) {
5117                 Py_INCREF(t);
5118                 Py_DECREF(*p);
5119                 *p = t;
5120                 return;
5121         }
5122
5123         if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
5124                 PyErr_Clear();
5125                 return;
5126         }
5127         /* The two references in interned are not counted by refcnt.
5128            The string deallocator will take care of this */
5129         Py_REFCNT(s) -= 2;
5130         PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
5131 }
5132
5133 void
5134 PyString_InternImmortal(PyObject **p)
5135 {
5136         PyString_InternInPlace(p);
5137         if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
5138                 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
5139                 Py_INCREF(*p);
5140         }
5141 }
5142
5143
5144 PyObject *
5145 PyString_InternFromString(const char *cp)
5146 {
5147         PyObject *s = PyString_FromString(cp);
5148         if (s == NULL)
5149                 return NULL;
5150         PyString_InternInPlace(&s);
5151         return s;
5152 }
5153
5154 void
5155 PyString_Fini(void)
5156 {
5157         int i;
5158         for (i = 0; i < UCHAR_MAX + 1; i++) {
5159                 Py_XDECREF(characters[i]);
5160                 characters[i] = NULL;
5161         }
5162         Py_XDECREF(nullstring);
5163         nullstring = NULL;
5164 }
5165
5166 void _Py_ReleaseInternedStrings(void)
5167 {
5168         PyObject *keys;
5169         PyStringObject *s;
5170         Py_ssize_t i, n;
5171         Py_ssize_t immortal_size = 0, mortal_size = 0;
5172
5173         if (interned == NULL || !PyDict_Check(interned))
5174                 return;
5175         keys = PyDict_Keys(interned);
5176         if (keys == NULL || !PyList_Check(keys)) {
5177                 PyErr_Clear();
5178                 return;
5179         }
5180
5181         /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5182            detector, interned strings are not forcibly deallocated; rather, we
5183            give them their stolen references back, and then clear and DECREF
5184            the interned dict. */
5185
5186         n = PyList_GET_SIZE(keys);
5187         fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
5188                 n);
5189         for (i = 0; i < n; i++) {
5190                 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
5191                 switch (s->ob_sstate) {
5192                 case SSTATE_NOT_INTERNED:
5193                         /* XXX Shouldn't happen */
5194                         break;
5195                 case SSTATE_INTERNED_IMMORTAL:
5196                         Py_REFCNT(s) += 1;
5197                         immortal_size += Py_SIZE(s);
5198                         break;
5199                 case SSTATE_INTERNED_MORTAL:
5200                         Py_REFCNT(s) += 2;
5201                         mortal_size += Py_SIZE(s);
5202                         break;
5203                 default:
5204                         Py_FatalError("Inconsistent interned string state.");
5205                 }
5206                 s->ob_sstate = SSTATE_NOT_INTERNED;
5207         }
5208         fprintf(stderr, "total size of all interned strings: "
5209                         "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
5210                         "mortal/immortal\n", mortal_size, immortal_size);
5211         Py_DECREF(keys);
5212         PyDict_Clear(interned);
5213         Py_DECREF(interned);
5214         interned = NULL;
5215 }