Objects/stringlib/string_format.h

   1 /*
   2     string_format.h -- implementation of string.format().
   3
   4     It uses the Objects/stringlib conventions, so that it can be
   5     compiled for both unicode and string objects.
   6 */
   7
   8
   9 /* Defines for Python 2.6 compatability */
  10 #if PY_VERSION_HEX < 0x03000000
  11 #define PyLong_FromSsize_t _PyLong_FromSsize_t
  12 #endif
  13
  14 /* Defines for more efficiently reallocating the string buffer */
  15 #define INITIAL_SIZE_INCREMENT 100
  16 #define SIZE_MULTIPLIER 2
  17 #define MAX_SIZE_INCREMENT  3200
  18
  19
  20 /************************************************************************/
  21 /***********   Global data structures and forward declarations  *********/
  22 /************************************************************************/
  23
  24 /*
  25    A SubString consists of the characters between two string or
  26    unicode pointers.
  27 */
  28 typedef struct {
  29     STRINGLIB_CHAR *ptr;
  30     STRINGLIB_CHAR *end;
  31 } SubString;
  32
  33
  34 /* forward declaration for recursion */
  35 static PyObject *
  36 build_string(SubString *input, PyObject *args, PyObject *kwargs,
  37              int recursion_depth);
  38
  39
  40
  41 /************************************************************************/
  42 /**************************  Utility  functions  ************************/
  43 /************************************************************************/
  44
  45 /* fill in a SubString from a pointer and length */
  46 Py_LOCAL_INLINE(void)
  47 SubString_init(SubString *str, STRINGLIB_CHAR *p, Py_ssize_t len)
  48 {
  49     str->ptr = p;
  50     if (p == NULL)
  51         str->end = NULL;
  52     else
  53         str->end = str->ptr + len;
  54 }
  55
  56 /* return a new string.  if str->ptr is NULL, return None */
  57 Py_LOCAL_INLINE(PyObject *)
  58 SubString_new_object(SubString *str)
  59 {
  60     if (str->ptr == NULL) {
  61         Py_INCREF(Py_None);
  62         return Py_None;
  63     }
  64     return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
  65 }
  66
  67 /* return a new string.  if str->ptr is NULL, return None */
  68 Py_LOCAL_INLINE(PyObject *)
  69 SubString_new_object_or_empty(SubString *str)
  70 {
  71     if (str->ptr == NULL) {
  72         return STRINGLIB_NEW(NULL, 0);
  73     }
  74     return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
  75 }
  76
  77 /************************************************************************/
  78 /***********    Output string management functions       ****************/
  79 /************************************************************************/
  80
  81 typedef struct {
  82     STRINGLIB_CHAR *ptr;
  83     STRINGLIB_CHAR *end;
  84     PyObject *obj;
  85     Py_ssize_t size_increment;
  86 } OutputString;
  87
  88 /* initialize an OutputString object, reserving size characters */
  89 static int
  90 output_initialize(OutputString *output, Py_ssize_t size)
  91 {
  92     output->obj = STRINGLIB_NEW(NULL, size);
  93     if (output->obj == NULL)
  94         return 0;
  95
  96     output->ptr = STRINGLIB_STR(output->obj);
  97     output->end = STRINGLIB_LEN(output->obj) + output->ptr;
  98     output->size_increment = INITIAL_SIZE_INCREMENT;
  99
 100     return 1;
 101 }
 102
 103 /*
 104     output_extend reallocates the output string buffer.
 105     It returns a status:  0 for a failed reallocation,
 106     1 for success.
 107 */
 108
 109 static int
 110 output_extend(OutputString *output, Py_ssize_t count)
 111 {
 112     STRINGLIB_CHAR *startptr = STRINGLIB_STR(output->obj);
 113     Py_ssize_t curlen = output->ptr - startptr;
 114     Py_ssize_t maxlen = curlen + count + output->size_increment;
 115
 116     if (STRINGLIB_RESIZE(&output->obj, maxlen) < 0)
 117         return 0;
 118     startptr = STRINGLIB_STR(output->obj);
 119     output->ptr = startptr + curlen;
 120     output->end = startptr + maxlen;
 121     if (output->size_increment < MAX_SIZE_INCREMENT)
 122         output->size_increment *= SIZE_MULTIPLIER;
 123     return 1;
 124 }
 125
 126 /*
 127     output_data dumps characters into our output string
 128     buffer.
 129
 130     In some cases, it has to reallocate the string.
 131
 132     It returns a status:  0 for a failed reallocation,
 133     1 for success.
 134 */
 135 static int
 136 output_data(OutputString *output, const STRINGLIB_CHAR *s, Py_ssize_t count)
 137 {
 138     if ((count > output->end - output->ptr) && !output_extend(output, count))
 139         return 0;
 140     memcpy(output->ptr, s, count * sizeof(STRINGLIB_CHAR));
 141     output->ptr += count;
 142     return 1;
 143 }
 144
 145 /************************************************************************/
 146 /***********  Format string parsing -- integers and identifiers *********/
 147 /************************************************************************/
 148
 149 static Py_ssize_t
 150 get_integer(const SubString *str)
 151 {
 152     Py_ssize_t accumulator = 0;
 153     Py_ssize_t digitval;
 154     Py_ssize_t oldaccumulator;
 155     STRINGLIB_CHAR *p;
 156
 157     /* empty string is an error */
 158     if (str->ptr >= str->end)
 159         return -1;
 160
 161     for (p = str->ptr; p < str->end; p++) {
 162         digitval = STRINGLIB_TODECIMAL(*p);
 163         if (digitval < 0)
 164             return -1;
 165         /*
 166            This trick was copied from old Unicode format code.  It's cute,
 167            but would really suck on an old machine with a slow divide
 168            implementation.  Fortunately, in the normal case we do not
 169            expect too many digits.
 170         */
 171         oldaccumulator = accumulator;
 172         accumulator *= 10;
 173         if ((accumulator+10)/10 != oldaccumulator+1) {
 174             PyErr_Format(PyExc_ValueError,
 175                          "Too many decimal digits in format string");
 176             return -1;
 177         }
 178         accumulator += digitval;
 179     }
 180     return accumulator;
 181 }
 182
 183 /************************************************************************/
 184 /******** Functions to get field objects and specification strings ******/
 185 /************************************************************************/
 186
 187 /* do the equivalent of obj.name */
 188 static PyObject *
 189 getattr(PyObject *obj, SubString *name)
 190 {
 191     PyObject *newobj;
 192     PyObject *str = SubString_new_object(name);
 193     if (str == NULL)
 194         return NULL;
 195     newobj = PyObject_GetAttr(obj, str);
 196     Py_DECREF(str);
 197     return newobj;
 198 }
 199
 200 /* do the equivalent of obj[idx], where obj is a sequence */
 201 static PyObject *
 202 getitem_sequence(PyObject *obj, Py_ssize_t idx)
 203 {
 204     return PySequence_GetItem(obj, idx);
 205 }
 206
 207 /* do the equivalent of obj[idx], where obj is not a sequence */
 208 static PyObject *
 209 getitem_idx(PyObject *obj, Py_ssize_t idx)
 210 {
 211     PyObject *newobj;
 212     PyObject *idx_obj = PyLong_FromSsize_t(idx);
 213     if (idx_obj == NULL)
 214         return NULL;
 215     newobj = PyObject_GetItem(obj, idx_obj);
 216     Py_DECREF(idx_obj);
 217     return newobj;
 218 }
 219
 220 /* do the equivalent of obj[name] */
 221 static PyObject *
 222 getitem_str(PyObject *obj, SubString *name)
 223 {
 224     PyObject *newobj;
 225     PyObject *str = SubString_new_object(name);
 226     if (str == NULL)
 227         return NULL;
 228     newobj = PyObject_GetItem(obj, str);
 229     Py_DECREF(str);
 230     return newobj;
 231 }
 232
 233 typedef struct {
 234     /* the entire string we're parsing.  we assume that someone else
 235        is managing its lifetime, and that it will exist for the
 236        lifetime of the iterator.  can be empty */
 237     SubString str;
 238
 239     /* pointer to where we are inside field_name */
 240     STRINGLIB_CHAR *ptr;
 241 } FieldNameIterator;
 242
 243
 244 static int
 245 FieldNameIterator_init(FieldNameIterator *self, STRINGLIB_CHAR *ptr,
 246                        Py_ssize_t len)
 247 {
 248     SubString_init(&self->str, ptr, len);
 249     self->ptr = self->str.ptr;
 250     return 1;
 251 }
 252
 253 static int
 254 _FieldNameIterator_attr(FieldNameIterator *self, SubString *name)
 255 {
 256     STRINGLIB_CHAR c;
 257
 258     name->ptr = self->ptr;
 259
 260     /* return everything until '.' or '[' */
 261     while (self->ptr < self->str.end) {
 262         switch (c = *self->ptr++) {
 263         case '[':
 264         case '.':
 265             /* backup so that we this character will be seen next time */
 266             self->ptr--;
 267             break;
 268         default:
 269             continue;
 270         }
 271         break;
 272     }
 273     /* end of string is okay */
 274     name->end = self->ptr;
 275     return 1;
 276 }
 277
 278 static int
 279 _FieldNameIterator_item(FieldNameIterator *self, SubString *name)
 280 {
 281     int bracket_seen = 0;
 282     STRINGLIB_CHAR c;
 283
 284     name->ptr = self->ptr;
 285
 286     /* return everything until ']' */
 287     while (self->ptr < self->str.end) {
 288         switch (c = *self->ptr++) {
 289         case ']':
 290             bracket_seen = 1;
 291             break;
 292         default:
 293             continue;
 294         }
 295         break;
 296     }
 297     /* make sure we ended with a ']' */
 298     if (!bracket_seen) {
 299         PyErr_SetString(PyExc_ValueError, "Missing ']' in format string");
 300         return 0;
 301     }
 302
 303     /* end of string is okay */
 304     /* don't include the ']' */
 305     name->end = self->ptr-1;
 306     return 1;
 307 }
 308
 309 /* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */
 310 static int
 311 FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
 312                        Py_ssize_t *name_idx, SubString *name)
 313 {
 314     /* check at end of input */
 315     if (self->ptr >= self->str.end)
 316         return 1;
 317
 318     switch (*self->ptr++) {
 319     case '.':
 320         *is_attribute = 1;
 321         if (_FieldNameIterator_attr(self, name) == 0)
 322             return 0;
 323         *name_idx = -1;
 324         break;
 325     case '[':
 326         *is_attribute = 0;
 327         if (_FieldNameIterator_item(self, name) == 0)
 328             return 0;
 329         *name_idx = get_integer(name);
 330         break;
 331     default:
 332         /* interal error, can't get here */
 333         assert(0);
 334         return 0;
 335     }
 336
 337     /* empty string is an error */
 338     if (name->ptr == name->end) {
 339         PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");
 340         return 0;
 341     }
 342
 343     return 2;
 344 }
 345
 346
 347 /* input: field_name
 348    output: 'first' points to the part before the first '[' or '.'
 349            'first_idx' is -1 if 'first' is not an integer, otherwise
 350                        it's the value of first converted to an integer
 351            'rest' is an iterator to return the rest
 352 */
 353 static int
 354 field_name_split(STRINGLIB_CHAR *ptr, Py_ssize_t len, SubString *first,
 355                  Py_ssize_t *first_idx, FieldNameIterator *rest)
 356 {
 357     STRINGLIB_CHAR c;
 358     STRINGLIB_CHAR *p = ptr;
 359     STRINGLIB_CHAR *end = ptr + len;
 360
 361     /* find the part up until the first '.' or '[' */
 362     while (p < end) {
 363         switch (c = *p++) {
 364         case '[':
 365         case '.':
 366             /* backup so that we this character is available to the
 367                "rest" iterator */
 368             p--;
 369             break;
 370         default:
 371             continue;
 372         }
 373         break;
 374     }
 375
 376     /* set up the return values */
 377     SubString_init(first, ptr, p - ptr);
 378     FieldNameIterator_init(rest, p, end - p);
 379
 380     /* see if "first" is an integer, in which case it's used as an index */
 381     *first_idx = get_integer(first);
 382
 383     /* zero length string is an error */
 384     if (first->ptr >= first->end) {
 385         PyErr_SetString(PyExc_ValueError, "empty field name");
 386         goto error;
 387     }
 388
 389     return 1;
 390 error:
 391     return 0;
 392 }
 393
 394
 395 /*
 396     get_field_object returns the object inside {}, before the
 397     format_spec.  It handles getindex and getattr lookups and consumes
 398     the entire input string.
 399 */
 400 static PyObject *
 401 get_field_object(SubString *input, PyObject *args, PyObject *kwargs)
 402 {
 403     PyObject *obj = NULL;
 404     int ok;
 405     int is_attribute;
 406     SubString name;
 407     SubString first;
 408     Py_ssize_t index;
 409     FieldNameIterator rest;
 410
 411     if (!field_name_split(input->ptr, input->end - input->ptr, &first,
 412                           &index, &rest)) {
 413         goto error;
 414     }
 415
 416     if (index == -1) {
 417         /* look up in kwargs */
 418         PyObject *key = SubString_new_object(&first);
 419         if (key == NULL)
 420             goto error;
 421         if ((kwargs == NULL) || (obj = PyDict_GetItem(kwargs, key)) == NULL) {
 422             PyErr_SetObject(PyExc_KeyError, key);
 423             Py_DECREF(key);
 424             goto error;
 425         }
 426         Py_DECREF(key);
 427         Py_INCREF(obj);
 428     }
 429     else {
 430         /* look up in args */
 431         obj = PySequence_GetItem(args, index);
 432         if (obj == NULL)
 433             goto error;
 434     }
 435
 436     /* iterate over the rest of the field_name */
 437     while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index,
 438                                         &name)) == 2) {
 439         PyObject *tmp;
 440
 441         if (is_attribute)
 442             /* getattr lookup "." */
 443             tmp = getattr(obj, &name);
 444         else
 445             /* getitem lookup "[]" */
 446             if (index == -1)
 447                 tmp = getitem_str(obj, &name);
 448             else
 449                 if (PySequence_Check(obj))
 450                     tmp = getitem_sequence(obj, index);
 451                 else
 452                     /* not a sequence */
 453                     tmp = getitem_idx(obj, index);
 454         if (tmp == NULL)
 455             goto error;
 456
 457         /* assign to obj */
 458         Py_DECREF(obj);
 459         obj = tmp;
 460     }
 461     /* end of iterator, this is the non-error case */
 462     if (ok == 1)
 463         return obj;
 464 error:
 465     Py_XDECREF(obj);
 466     return NULL;
 467 }
 468
 469 /************************************************************************/
 470 /*****************  Field rendering functions  **************************/
 471 /************************************************************************/
 472
 473 /*
 474     render_field() is the main function in this section.  It takes the
 475     field object and field specification string generated by
 476     get_field_and_spec, and renders the field into the output string.
 477
 478     render_field calls fieldobj.__format__(format_spec) method, and
 479     appends to the output.
 480 */
 481 static int
 482 render_field(PyObject *fieldobj, SubString *format_spec, OutputString *output)
 483 {
 484     int ok = 0;
 485     PyObject *result = NULL;
 486
 487     /* we need to create an object out of the pointers we have */
 488     PyObject *format_spec_object = SubString_new_object_or_empty(format_spec);
 489     if (format_spec_object == NULL)
 490         goto done;
 491
 492     result = PyObject_Format(fieldobj, format_spec_object);
 493     if (result == NULL)
 494         goto done;
 495
 496 #if PY_VERSION_HEX >= 0x03000000
 497     assert(PyUnicode_Check(result));
 498 #else
 499     assert(PyString_Check(result) || PyUnicode_Check(result));
 500
 501     /* Convert result to our type.  We could be str, and result could
 502        be unicode */
 503     {
 504         PyObject *tmp = STRINGLIB_TOSTR(result);
 505         if (tmp == NULL)
 506             goto done;
 507         Py_DECREF(result);
 508         result = tmp;
 509     }
 510 #endif
 511
 512     ok = output_data(output,
 513                      STRINGLIB_STR(result), STRINGLIB_LEN(result));
 514 done:
 515     Py_DECREF(format_spec_object);
 516     Py_XDECREF(result);
 517     return ok;
 518 }
 519
 520 static int
 521 parse_field(SubString *str, SubString *field_name, SubString *format_spec,
 522             STRINGLIB_CHAR *conversion)
 523 {
 524     STRINGLIB_CHAR c = 0;
 525
 526     /* initialize these, as they may be empty */
 527     *conversion = '\0';
 528     SubString_init(format_spec, NULL, 0);
 529
 530     /* search for the field name.  it's terminated by the end of the
 531        string, or a ':' or '!' */
 532     field_name->ptr = str->ptr;
 533     while (str->ptr < str->end) {
 534         switch (c = *(str->ptr++)) {
 535         case ':':
 536         case '!':
 537             break;
 538         default:
 539             continue;
 540         }
 541         break;
 542     }
 543
 544     if (c == '!' || c == ':') {
 545         /* we have a format specifier and/or a conversion */
 546         /* don't include the last character */
 547         field_name->end = str->ptr-1;
 548
 549         /* the format specifier is the rest of the string */
 550         format_spec->ptr = str->ptr;
 551         format_spec->end = str->end;
 552
 553         /* see if there's a conversion specifier */
 554         if (c == '!') {
 555             /* there must be another character present */
 556             if (format_spec->ptr >= format_spec->end) {
 557                 PyErr_SetString(PyExc_ValueError,
 558                                 "end of format while looking for conversion "
 559                                 "specifier");
 560                 return 0;
 561             }
 562             *conversion = *(format_spec->ptr++);
 563
 564             /* if there is another character, it must be a colon */
 565             if (format_spec->ptr < format_spec->end) {
 566                 c = *(format_spec->ptr++);
 567                 if (c != ':') {
 568                     PyErr_SetString(PyExc_ValueError,
 569                                     "expected ':' after format specifier");
 570                     return 0;
 571                 }
 572             }
 573         }
 574
 575         return 1;
 576
 577     }
 578     else {
 579         /* end of string, there's no format_spec or conversion */
 580         field_name->end = str->ptr;
 581         return 1;
 582     }
 583 }
 584
 585 /************************************************************************/
 586 /******* Output string allocation and escape-to-markup processing  ******/
 587 /************************************************************************/
 588
 589 /* MarkupIterator breaks the string into pieces of either literal
 590    text, or things inside {} that need to be marked up.  it is
 591    designed to make it easy to wrap a Python iterator around it, for
 592    use with the Formatter class */
 593
 594 typedef struct {
 595     SubString str;
 596 } MarkupIterator;
 597
 598 static int
 599 MarkupIterator_init(MarkupIterator *self, STRINGLIB_CHAR *ptr, Py_ssize_t len)
 600 {
 601     SubString_init(&self->str, ptr, len);
 602     return 1;
 603 }
 604
 605 /* returns 0 on error, 1 on non-error termination, and 2 if it got a
 606    string (or something to be expanded) */
 607 static int
 608 MarkupIterator_next(MarkupIterator *self, SubString *literal,
 609                     SubString *field_name, SubString *format_spec,
 610                     STRINGLIB_CHAR *conversion,
 611                     int *format_spec_needs_expanding)
 612 {
 613     int at_end;
 614     STRINGLIB_CHAR c = 0;
 615     STRINGLIB_CHAR *start;
 616     int count;
 617     Py_ssize_t len;
 618     int markup_follows = 0;
 619
 620     /* initialize all of the output variables */
 621     SubString_init(literal, NULL, 0);
 622     SubString_init(field_name, NULL, 0);
 623     SubString_init(format_spec, NULL, 0);
 624     *conversion = '\0';
 625     *format_spec_needs_expanding = 0;
 626
 627     /* No more input, end of iterator.  This is the normal exit
 628        path. */
 629     if (self->str.ptr >= self->str.end)
 630         return 1;
 631
 632     start = self->str.ptr;
 633
 634     /* First read any literal text. Read until the end of string, an
 635        escaped '{' or '}', or an unescaped '{'.  In order to never
 636        allocate memory and so I can just pass pointers around, if
 637        there's an escaped '{' or '}' then we'll return the literal
 638        including the brace, but no format object.  The next time
 639        through, we'll return the rest of the literal, skipping past
 640        the second consecutive brace. */
 641     while (self->str.ptr < self->str.end) {
 642         switch (c = *(self->str.ptr++)) {
 643         case '{':
 644         case '}':
 645             markup_follows = 1;
 646             break;
 647         default:
 648             continue;
 649         }
 650         break;
 651     }
 652
 653     at_end = self->str.ptr >= self->str.end;
 654     len = self->str.ptr - start;
 655
 656     if ((c == '}') && (at_end || (c != *self->str.ptr))) {
 657         PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
 658                         "in format string");
 659         return 0;
 660     }
 661     if (at_end && c == '{') {
 662         PyErr_SetString(PyExc_ValueError, "Single '{' encountered "
 663                         "in format string");
 664         return 0;
 665     }
 666     if (!at_end) {
 667         if (c == *self->str.ptr) {
 668             /* escaped } or {, skip it in the input.  there is no
 669                markup object following us, just this literal text */
 670             self->str.ptr++;
 671             markup_follows = 0;
 672         }
 673         else
 674             len--;
 675     }
 676
 677     /* record the literal text */
 678     literal->ptr = start;
 679     literal->end = start + len;
 680
 681     if (!markup_follows)
 682         return 2;
 683
 684     /* this is markup, find the end of the string by counting nested
 685        braces.  note that this prohibits escaped braces, so that
 686        format_specs cannot have braces in them. */
 687     count = 1;
 688
 689     start = self->str.ptr;
 690
 691     /* we know we can't have a zero length string, so don't worry
 692        about that case */
 693     while (self->str.ptr < self->str.end) {
 694         switch (c = *(self->str.ptr++)) {
 695         case '{':
 696             /* the format spec needs to be recursively expanded.
 697                this is an optimization, and not strictly needed */
 698             *format_spec_needs_expanding = 1;
 699             count++;
 700             break;
 701         case '}':
 702             count--;
 703             if (count <= 0) {
 704                 /* we're done.  parse and get out */
 705                 SubString s;
 706
 707                 SubString_init(&s, start, self->str.ptr - 1 - start);
 708                 if (parse_field(&s, field_name, format_spec, conversion) == 0)
 709                     return 0;
 710
 711                 /* a zero length field_name is an error */
 712                 if (field_name->ptr == field_name->end) {
 713                     PyErr_SetString(PyExc_ValueError, "zero length field name "
 714                                     "in format");
 715                     return 0;
 716                 }
 717
 718                 /* success */
 719                 return 2;
 720             }
 721             break;
 722         }
 723     }
 724
 725     /* end of string while searching for matching '}' */
 726     PyErr_SetString(PyExc_ValueError, "unmatched '{' in format");
 727     return 0;
 728 }
 729
 730
 731 /* do the !r or !s conversion on obj */
 732 static PyObject *
 733 do_conversion(PyObject *obj, STRINGLIB_CHAR conversion)
 734 {
 735     /* XXX in pre-3.0, do we need to convert this to unicode, since it
 736        might have returned a string? */
 737     switch (conversion) {
 738     case 'r':
 739         return PyObject_Repr(obj);
 740     case 's':
 741         return STRINGLIB_TOSTR(obj);
 742     default:
 743         if (conversion > 32 && conversion < 127) {
 744                 /* It's the ASCII subrange; casting to char is safe
 745                    (assuming the execution character set is an ASCII
 746                    superset). */
 747                 PyErr_Format(PyExc_ValueError,
 748                      "Unknown conversion specifier %c",
 749                      (char)conversion);
 750         } else
 751                 PyErr_Format(PyExc_ValueError,
 752                      "Unknown conversion specifier \\x%x",
 753                      (unsigned int)conversion);
 754         return NULL;
 755     }
 756 }
 757
 758 /* given:
 759
 760    {field_name!conversion:format_spec}
 761
 762    compute the result and write it to output.
 763    format_spec_needs_expanding is an optimization.  if it's false,
 764    just output the string directly, otherwise recursively expand the
 765    format_spec string. */
 766
 767 static int
 768 output_markup(SubString *field_name, SubString *format_spec,
 769               int format_spec_needs_expanding, STRINGLIB_CHAR conversion,
 770               OutputString *output, PyObject *args, PyObject *kwargs,
 771               int recursion_depth)
 772 {
 773     PyObject *tmp = NULL;
 774     PyObject *fieldobj = NULL;
 775     SubString expanded_format_spec;
 776     SubString *actual_format_spec;
 777     int result = 0;
 778
 779     /* convert field_name to an object */
 780     fieldobj = get_field_object(field_name, args, kwargs);
 781     if (fieldobj == NULL)
 782         goto done;
 783
 784     if (conversion != '\0') {
 785         tmp = do_conversion(fieldobj, conversion);
 786         if (tmp == NULL)
 787             goto done;
 788
 789         /* do the assignment, transferring ownership: fieldobj = tmp */
 790         Py_DECREF(fieldobj);
 791         fieldobj = tmp;
 792         tmp = NULL;
 793     }
 794
 795     /* if needed, recurively compute the format_spec */
 796     if (format_spec_needs_expanding) {
 797         tmp = build_string(format_spec, args, kwargs, recursion_depth-1);
 798         if (tmp == NULL)
 799             goto done;
 800
 801         /* note that in the case we're expanding the format string,
 802            tmp must be kept around until after the call to
 803            render_field. */
 804         SubString_init(&expanded_format_spec,
 805                        STRINGLIB_STR(tmp), STRINGLIB_LEN(tmp));
 806         actual_format_spec = &expanded_format_spec;
 807     }
 808     else
 809         actual_format_spec = format_spec;
 810
 811     if (render_field(fieldobj, actual_format_spec, output) == 0)
 812         goto done;
 813
 814     result = 1;
 815
 816 done:
 817     Py_XDECREF(fieldobj);
 818     Py_XDECREF(tmp);
 819
 820     return result;
 821 }
 822
 823 /*
 824     do_markup is the top-level loop for the format() method.  It
 825     searches through the format string for escapes to markup codes, and
 826     calls other functions to move non-markup text to the output,
 827     and to perform the markup to the output.
 828 */
 829 static int
 830 do_markup(SubString *input, PyObject *args, PyObject *kwargs,
 831           OutputString *output, int recursion_depth)
 832 {
 833     MarkupIterator iter;
 834     int format_spec_needs_expanding;
 835     int result;
 836     SubString literal;
 837     SubString field_name;
 838     SubString format_spec;
 839     STRINGLIB_CHAR conversion;
 840
 841     MarkupIterator_init(&iter, input->ptr, input->end - input->ptr);
 842     while ((result = MarkupIterator_next(&iter, &literal, &field_name,
 843                                          &format_spec, &conversion,
 844                                          &format_spec_needs_expanding)) == 2) {
 845         if (!output_data(output, literal.ptr, literal.end - literal.ptr))
 846             return 0;
 847         if (field_name.ptr != field_name.end)
 848             if (!output_markup(&field_name, &format_spec,
 849                                format_spec_needs_expanding, conversion, output,
 850                                args, kwargs, recursion_depth))
 851                 return 0;
 852     }
 853     return result;
 854 }
 855
 856
 857 /*
 858     build_string allocates the output string and then
 859     calls do_markup to do the heavy lifting.
 860 */
 861 static PyObject *
 862 build_string(SubString *input, PyObject *args, PyObject *kwargs,
 863              int recursion_depth)
 864 {
 865     OutputString output;
 866     PyObject *result = NULL;
 867     Py_ssize_t count;
 868
 869     output.obj = NULL; /* needed so cleanup code always works */
 870
 871     /* check the recursion level */
 872     if (recursion_depth <= 0) {
 873         PyErr_SetString(PyExc_ValueError,
 874                         "Max string recursion exceeded");
 875         goto done;
 876     }
 877
 878     /* initial size is the length of the format string, plus the size
 879        increment.  seems like a reasonable default */
 880     if (!output_initialize(&output,
 881                            input->end - input->ptr +
 882                            INITIAL_SIZE_INCREMENT))
 883         goto done;
 884
 885     if (!do_markup(input, args, kwargs, &output, recursion_depth)) {
 886         goto done;
 887     }
 888
 889     count = output.ptr - STRINGLIB_STR(output.obj);
 890     if (STRINGLIB_RESIZE(&output.obj, count) < 0) {
 891         goto done;
 892     }
 893
 894     /* transfer ownership to result */
 895     result = output.obj;
 896     output.obj = NULL;
 897
 898 done:
 899     Py_XDECREF(output.obj);
 900     return result;
 901 }
 902
 903 /************************************************************************/
 904 /*********** main routine ***********************************************/
 905 /************************************************************************/
 906
 907 /* this is the main entry point */
 908 static PyObject *
 909 do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
 910 {
 911     SubString input;
 912
 913     /* PEP 3101 says only 2 levels, so that
 914        "{0:{1}}".format('abc', 's')            # works
 915        "{0:{1:{2}}}".format('abc', 's', '')    # fails
 916     */
 917     int recursion_depth = 2;
 918
 919     SubString_init(&input, STRINGLIB_STR(self), STRINGLIB_LEN(self));
 920     return build_string(&input, args, kwargs, recursion_depth);
 921 }
 922
 923
 924
 925 /************************************************************************/
 926 /*********** formatteriterator ******************************************/
 927 /************************************************************************/
 928
 929 /* This is used to implement string.Formatter.vparse().  It exists so
 930    Formatter can share code with the built in unicode.format() method.
 931    It's really just a wrapper around MarkupIterator that is callable
 932    from Python. */
 933
 934 typedef struct {
 935     PyObject_HEAD
 936
 937     STRINGLIB_OBJECT *str;
 938
 939     MarkupIterator it_markup;
 940 } formatteriterobject;
 941
 942 static void
 943 formatteriter_dealloc(formatteriterobject *it)
 944 {
 945     Py_XDECREF(it->str);
 946     PyObject_FREE(it);
 947 }
 948
 949 /* returns a tuple:
 950    (literal, field_name, format_spec, conversion)
 951
 952    literal is any literal text to output.  might be zero length
 953    field_name is the string before the ':'.  might be None
 954    format_spec is the string after the ':'.  mibht be None
 955    conversion is either None, or the string after the '!'
 956 */
 957 static PyObject *
 958 formatteriter_next(formatteriterobject *it)
 959 {
 960     SubString literal;
 961     SubString field_name;
 962     SubString format_spec;
 963     STRINGLIB_CHAR conversion;
 964     int format_spec_needs_expanding;
 965     int result = MarkupIterator_next(&it->it_markup, &literal, &field_name,
 966                                      &format_spec, &conversion,
 967                                      &format_spec_needs_expanding);
 968
 969     /* all of the SubString objects point into it->str, so no
 970        memory management needs to be done on them */
 971     assert(0 <= result && result <= 2);
 972     if (result == 0 || result == 1)
 973         /* if 0, error has already been set, if 1, iterator is empty */
 974         return NULL;
 975     else {
 976         PyObject *literal_str = NULL;
 977         PyObject *field_name_str = NULL;
 978         PyObject *format_spec_str = NULL;
 979         PyObject *conversion_str = NULL;
 980         PyObject *tuple = NULL;
 981         int has_field = field_name.ptr != field_name.end;
 982
 983         literal_str = SubString_new_object(&literal);
 984         if (literal_str == NULL)
 985             goto done;
 986
 987         field_name_str = SubString_new_object(&field_name);
 988         if (field_name_str == NULL)
 989             goto done;
 990
 991         /* if field_name is non-zero length, return a string for
 992            format_spec (even if zero length), else return None */
 993         format_spec_str = (has_field ?
 994                            SubString_new_object_or_empty :
 995                            SubString_new_object)(&format_spec);
 996         if (format_spec_str == NULL)
 997             goto done;
 998
 999         /* if the conversion is not specified, return a None,
1000            otherwise create a one length string with the conversion
1001            character */
1002         if (conversion == '\0') {
1003             conversion_str = Py_None;
1004             Py_INCREF(conversion_str);
1005         }
1006         else
1007             conversion_str = STRINGLIB_NEW(&conversion, 1);
1008         if (conversion_str == NULL)
1009             goto done;
1010
1011         tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str,
1012                              conversion_str);
1013     done:
1014         Py_XDECREF(literal_str);
1015         Py_XDECREF(field_name_str);
1016         Py_XDECREF(format_spec_str);
1017         Py_XDECREF(conversion_str);
1018         return tuple;
1019     }
1020 }
1021
1022 static PyMethodDef formatteriter_methods[] = {
1023     {NULL,              NULL}           /* sentinel */
1024 };
1025
1026 static PyTypeObject PyFormatterIter_Type = {
1027     PyVarObject_HEAD_INIT(&PyType_Type, 0)
1028     "formatteriterator",                /* tp_name */
1029     sizeof(formatteriterobject),        /* tp_basicsize */
1030     0,                                  /* tp_itemsize */
1031     /* methods */
1032     (destructor)formatteriter_dealloc,  /* tp_dealloc */
1033     0,                                  /* tp_print */
1034     0,                                  /* tp_getattr */
1035     0,                                  /* tp_setattr */
1036     0,                                  /* tp_compare */
1037     0,                                  /* tp_repr */
1038     0,                                  /* tp_as_number */
1039     0,                                  /* tp_as_sequence */
1040     0,                                  /* tp_as_mapping */
1041     0,                                  /* tp_hash */
1042     0,                                  /* tp_call */
1043     0,                                  /* tp_str */
1044     PyObject_GenericGetAttr,            /* tp_getattro */
1045     0,                                  /* tp_setattro */
1046     0,                                  /* tp_as_buffer */
1047     Py_TPFLAGS_DEFAULT,                 /* tp_flags */
1048     0,                                  /* tp_doc */
1049     0,                                  /* tp_traverse */
1050     0,                                  /* tp_clear */
1051     0,                                  /* tp_richcompare */
1052     0,                                  /* tp_weaklistoffset */
1053     PyObject_SelfIter,                  /* tp_iter */
1054     (iternextfunc)formatteriter_next,   /* tp_iternext */
1055     formatteriter_methods,              /* tp_methods */
1056     0,
1057 };
1058
1059 /* unicode_formatter_parser is used to implement
1060    string.Formatter.vformat.  it parses a string and returns tuples
1061    describing the parsed elements.  It's a wrapper around
1062    stringlib/string_format.h's MarkupIterator */
1063 static PyObject *
1064 formatter_parser(STRINGLIB_OBJECT *self)
1065 {
1066     formatteriterobject *it;
1067
1068     it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
1069     if (it == NULL)
1070         return NULL;
1071
1072     /* take ownership, give the object to the iterator */
1073     Py_INCREF(self);
1074     it->str = self;
1075
1076     /* initialize the contained MarkupIterator */
1077     MarkupIterator_init(&it->it_markup,
1078                         STRINGLIB_STR(self),
1079                         STRINGLIB_LEN(self));
1080
1081     return (PyObject *)it;
1082 }
1083
1084
1085 /************************************************************************/
1086 /*********** fieldnameiterator ******************************************/
1087 /************************************************************************/
1088
1089
1090 /* This is used to implement string.Formatter.vparse().  It parses the
1091    field name into attribute and item values.  It's a Python-callable
1092    wrapper around FieldNameIterator */
1093
1094 typedef struct {
1095     PyObject_HEAD
1096
1097     STRINGLIB_OBJECT *str;
1098
1099     FieldNameIterator it_field;
1100 } fieldnameiterobject;
1101
1102 static void
1103 fieldnameiter_dealloc(fieldnameiterobject *it)
1104 {
1105     Py_XDECREF(it->str);
1106     PyObject_FREE(it);
1107 }
1108
1109 /* returns a tuple:
1110    (is_attr, value)
1111    is_attr is true if we used attribute syntax (e.g., '.foo')
1112               false if we used index syntax (e.g., '[foo]')
1113    value is an integer or string
1114 */
1115 static PyObject *
1116 fieldnameiter_next(fieldnameiterobject *it)
1117 {
1118     int result;
1119     int is_attr;
1120     Py_ssize_t idx;
1121     SubString name;
1122
1123     result = FieldNameIterator_next(&it->it_field, &is_attr,
1124                                     &idx, &name);
1125     if (result == 0 || result == 1)
1126         /* if 0, error has already been set, if 1, iterator is empty */
1127         return NULL;
1128     else {
1129         PyObject* result = NULL;
1130         PyObject* is_attr_obj = NULL;
1131         PyObject* obj = NULL;
1132
1133         is_attr_obj = PyBool_FromLong(is_attr);
1134         if (is_attr_obj == NULL)
1135             goto done;
1136
1137         /* either an integer or a string */
1138         if (idx != -1)
1139             obj = PyLong_FromSsize_t(idx);
1140         else
1141             obj = SubString_new_object(&name);
1142         if (obj == NULL)
1143             goto done;
1144
1145         /* return a tuple of values */
1146         result = PyTuple_Pack(2, is_attr_obj, obj);
1147
1148     done:
1149         Py_XDECREF(is_attr_obj);
1150         Py_XDECREF(obj);
1151         return result;
1152     }
1153 }
1154
1155 static PyMethodDef fieldnameiter_methods[] = {
1156     {NULL,              NULL}           /* sentinel */
1157 };
1158
1159 static PyTypeObject PyFieldNameIter_Type = {
1160     PyVarObject_HEAD_INIT(&PyType_Type, 0)
1161     "fieldnameiterator",                /* tp_name */
1162     sizeof(fieldnameiterobject),        /* tp_basicsize */
1163     0,                                  /* tp_itemsize */
1164     /* methods */
1165     (destructor)fieldnameiter_dealloc,  /* tp_dealloc */
1166     0,                                  /* tp_print */
1167     0,                                  /* tp_getattr */
1168     0,                                  /* tp_setattr */
1169     0,                                  /* tp_compare */
1170     0,                                  /* tp_repr */
1171     0,                                  /* tp_as_number */
1172     0,                                  /* tp_as_sequence */
1173     0,                                  /* tp_as_mapping */
1174     0,                                  /* tp_hash */
1175     0,                                  /* tp_call */
1176     0,                                  /* tp_str */
1177     PyObject_GenericGetAttr,            /* tp_getattro */
1178     0,                                  /* tp_setattro */
1179     0,                                  /* tp_as_buffer */
1180     Py_TPFLAGS_DEFAULT,                 /* tp_flags */
1181     0,                                  /* tp_doc */
1182     0,                                  /* tp_traverse */
1183     0,                                  /* tp_clear */
1184     0,                                  /* tp_richcompare */
1185     0,                                  /* tp_weaklistoffset */
1186     PyObject_SelfIter,                  /* tp_iter */
1187     (iternextfunc)fieldnameiter_next,   /* tp_iternext */
1188     fieldnameiter_methods,              /* tp_methods */
1189     0};
1190
1191 /* unicode_formatter_field_name_split is used to implement
1192    string.Formatter.vformat.  it takes an PEP 3101 "field name", and
1193    returns a tuple of (first, rest): "first", the part before the
1194    first '.' or '['; and "rest", an iterator for the rest of the field
1195    name.  it's a wrapper around stringlib/string_format.h's
1196    field_name_split.  The iterator it returns is a
1197    FieldNameIterator */
1198 static PyObject *
1199 formatter_field_name_split(STRINGLIB_OBJECT *self)
1200 {
1201     SubString first;
1202     Py_ssize_t first_idx;
1203     fieldnameiterobject *it;
1204
1205     PyObject *first_obj = NULL;
1206     PyObject *result = NULL;
1207
1208     it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
1209     if (it == NULL)
1210         return NULL;
1211
1212     /* take ownership, give the object to the iterator.  this is
1213        just to keep the field_name alive */
1214     Py_INCREF(self);
1215     it->str = self;
1216
1217     if (!field_name_split(STRINGLIB_STR(self),
1218                           STRINGLIB_LEN(self),
1219                           &first, &first_idx, &it->it_field))
1220         goto done;
1221
1222     /* first becomes an integer, if possible; else a string */
1223     if (first_idx != -1)
1224         first_obj = PyLong_FromSsize_t(first_idx);
1225     else
1226         /* convert "first" into a string object */
1227         first_obj = SubString_new_object(&first);
1228     if (first_obj == NULL)
1229         goto done;
1230
1231     /* return a tuple of values */
1232     result = PyTuple_Pack(2, first_obj, it);
1233
1234 done:
1235     Py_XDECREF(it);
1236     Py_XDECREF(first_obj);
1237     return result;
1238 }