Use richer assertions in test_mailbox (for better failure messages).
[python.git] / Objects / stringlib / string_format.h
blobee6533e44b248d96c14a46c11d59b91513828ea0
1 /*
2 string_format.h -- implementation of string.format().
4 It uses the Objects/stringlib conventions, so that it can be
5 compiled for both unicode and string objects.
6 */
9 /* Defines for Python 2.6 compatability */
10 #if PY_VERSION_HEX < 0x03000000
11 #define PyLong_FromSsize_t _PyLong_FromSsize_t
12 #endif
14 /* Defines for more efficiently reallocating the string buffer */
15 #define INITIAL_SIZE_INCREMENT 100
16 #define SIZE_MULTIPLIER 2
17 #define MAX_SIZE_INCREMENT 3200
20 /************************************************************************/
21 /*********** Global data structures and forward declarations *********/
22 /************************************************************************/
25 A SubString consists of the characters between two string or
26 unicode pointers.
28 typedef struct {
29 STRINGLIB_CHAR *ptr;
30 STRINGLIB_CHAR *end;
31 } SubString;
34 typedef enum {
35 ANS_INIT,
36 ANS_AUTO,
37 ANS_MANUAL
38 } AutoNumberState; /* Keep track if we're auto-numbering fields */
40 /* Keeps track of our auto-numbering state, and which number field we're on */
41 typedef struct {
42 AutoNumberState an_state;
43 int an_field_number;
44 } AutoNumber;
47 /* forward declaration for recursion */
48 static PyObject *
49 build_string(SubString *input, PyObject *args, PyObject *kwargs,
50 int recursion_depth, AutoNumber *auto_number);
54 /************************************************************************/
55 /************************** Utility functions ************************/
56 /************************************************************************/
58 static void
59 AutoNumber_Init(AutoNumber *auto_number)
61 auto_number->an_state = ANS_INIT;
62 auto_number->an_field_number = 0;
65 /* fill in a SubString from a pointer and length */
66 Py_LOCAL_INLINE(void)
67 SubString_init(SubString *str, STRINGLIB_CHAR *p, Py_ssize_t len)
69 str->ptr = p;
70 if (p == NULL)
71 str->end = NULL;
72 else
73 str->end = str->ptr + len;
76 /* return a new string. if str->ptr is NULL, return None */
77 Py_LOCAL_INLINE(PyObject *)
78 SubString_new_object(SubString *str)
80 if (str->ptr == NULL) {
81 Py_INCREF(Py_None);
82 return Py_None;
84 return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
87 /* return a new string. if str->ptr is NULL, return None */
88 Py_LOCAL_INLINE(PyObject *)
89 SubString_new_object_or_empty(SubString *str)
91 if (str->ptr == NULL) {
92 return STRINGLIB_NEW(NULL, 0);
94 return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
97 /* Return 1 if an error has been detected switching between automatic
98 field numbering and manual field specification, else return 0. Set
99 ValueError on error. */
100 static int
101 autonumber_state_error(AutoNumberState state, int field_name_is_empty)
103 if (state == ANS_MANUAL) {
104 if (field_name_is_empty) {
105 PyErr_SetString(PyExc_ValueError, "cannot switch from "
106 "manual field specification to "
107 "automatic field numbering");
108 return 1;
111 else {
112 if (!field_name_is_empty) {
113 PyErr_SetString(PyExc_ValueError, "cannot switch from "
114 "automatic field numbering to "
115 "manual field specification");
116 return 1;
119 return 0;
123 /************************************************************************/
124 /*********** Output string management functions ****************/
125 /************************************************************************/
127 typedef struct {
128 STRINGLIB_CHAR *ptr;
129 STRINGLIB_CHAR *end;
130 PyObject *obj;
131 Py_ssize_t size_increment;
132 } OutputString;
134 /* initialize an OutputString object, reserving size characters */
135 static int
136 output_initialize(OutputString *output, Py_ssize_t size)
138 output->obj = STRINGLIB_NEW(NULL, size);
139 if (output->obj == NULL)
140 return 0;
142 output->ptr = STRINGLIB_STR(output->obj);
143 output->end = STRINGLIB_LEN(output->obj) + output->ptr;
144 output->size_increment = INITIAL_SIZE_INCREMENT;
146 return 1;
150 output_extend reallocates the output string buffer.
151 It returns a status: 0 for a failed reallocation,
152 1 for success.
155 static int
156 output_extend(OutputString *output, Py_ssize_t count)
158 STRINGLIB_CHAR *startptr = STRINGLIB_STR(output->obj);
159 Py_ssize_t curlen = output->ptr - startptr;
160 Py_ssize_t maxlen = curlen + count + output->size_increment;
162 if (STRINGLIB_RESIZE(&output->obj, maxlen) < 0)
163 return 0;
164 startptr = STRINGLIB_STR(output->obj);
165 output->ptr = startptr + curlen;
166 output->end = startptr + maxlen;
167 if (output->size_increment < MAX_SIZE_INCREMENT)
168 output->size_increment *= SIZE_MULTIPLIER;
169 return 1;
173 output_data dumps characters into our output string
174 buffer.
176 In some cases, it has to reallocate the string.
178 It returns a status: 0 for a failed reallocation,
179 1 for success.
181 static int
182 output_data(OutputString *output, const STRINGLIB_CHAR *s, Py_ssize_t count)
184 if ((count > output->end - output->ptr) && !output_extend(output, count))
185 return 0;
186 memcpy(output->ptr, s, count * sizeof(STRINGLIB_CHAR));
187 output->ptr += count;
188 return 1;
191 /************************************************************************/
192 /*********** Format string parsing -- integers and identifiers *********/
193 /************************************************************************/
195 static Py_ssize_t
196 get_integer(const SubString *str)
198 Py_ssize_t accumulator = 0;
199 Py_ssize_t digitval;
200 Py_ssize_t oldaccumulator;
201 STRINGLIB_CHAR *p;
203 /* empty string is an error */
204 if (str->ptr >= str->end)
205 return -1;
207 for (p = str->ptr; p < str->end; p++) {
208 digitval = STRINGLIB_TODECIMAL(*p);
209 if (digitval < 0)
210 return -1;
212 This trick was copied from old Unicode format code. It's cute,
213 but would really suck on an old machine with a slow divide
214 implementation. Fortunately, in the normal case we do not
215 expect too many digits.
217 oldaccumulator = accumulator;
218 accumulator *= 10;
219 if ((accumulator+10)/10 != oldaccumulator+1) {
220 PyErr_Format(PyExc_ValueError,
221 "Too many decimal digits in format string");
222 return -1;
224 accumulator += digitval;
226 return accumulator;
229 /************************************************************************/
230 /******** Functions to get field objects and specification strings ******/
231 /************************************************************************/
233 /* do the equivalent of obj.name */
234 static PyObject *
235 getattr(PyObject *obj, SubString *name)
237 PyObject *newobj;
238 PyObject *str = SubString_new_object(name);
239 if (str == NULL)
240 return NULL;
241 newobj = PyObject_GetAttr(obj, str);
242 Py_DECREF(str);
243 return newobj;
246 /* do the equivalent of obj[idx], where obj is a sequence */
247 static PyObject *
248 getitem_sequence(PyObject *obj, Py_ssize_t idx)
250 return PySequence_GetItem(obj, idx);
253 /* do the equivalent of obj[idx], where obj is not a sequence */
254 static PyObject *
255 getitem_idx(PyObject *obj, Py_ssize_t idx)
257 PyObject *newobj;
258 PyObject *idx_obj = PyLong_FromSsize_t(idx);
259 if (idx_obj == NULL)
260 return NULL;
261 newobj = PyObject_GetItem(obj, idx_obj);
262 Py_DECREF(idx_obj);
263 return newobj;
266 /* do the equivalent of obj[name] */
267 static PyObject *
268 getitem_str(PyObject *obj, SubString *name)
270 PyObject *newobj;
271 PyObject *str = SubString_new_object(name);
272 if (str == NULL)
273 return NULL;
274 newobj = PyObject_GetItem(obj, str);
275 Py_DECREF(str);
276 return newobj;
279 typedef struct {
280 /* the entire string we're parsing. we assume that someone else
281 is managing its lifetime, and that it will exist for the
282 lifetime of the iterator. can be empty */
283 SubString str;
285 /* pointer to where we are inside field_name */
286 STRINGLIB_CHAR *ptr;
287 } FieldNameIterator;
290 static int
291 FieldNameIterator_init(FieldNameIterator *self, STRINGLIB_CHAR *ptr,
292 Py_ssize_t len)
294 SubString_init(&self->str, ptr, len);
295 self->ptr = self->str.ptr;
296 return 1;
299 static int
300 _FieldNameIterator_attr(FieldNameIterator *self, SubString *name)
302 STRINGLIB_CHAR c;
304 name->ptr = self->ptr;
306 /* return everything until '.' or '[' */
307 while (self->ptr < self->str.end) {
308 switch (c = *self->ptr++) {
309 case '[':
310 case '.':
311 /* backup so that we this character will be seen next time */
312 self->ptr--;
313 break;
314 default:
315 continue;
317 break;
319 /* end of string is okay */
320 name->end = self->ptr;
321 return 1;
324 static int
325 _FieldNameIterator_item(FieldNameIterator *self, SubString *name)
327 int bracket_seen = 0;
328 STRINGLIB_CHAR c;
330 name->ptr = self->ptr;
332 /* return everything until ']' */
333 while (self->ptr < self->str.end) {
334 switch (c = *self->ptr++) {
335 case ']':
336 bracket_seen = 1;
337 break;
338 default:
339 continue;
341 break;
343 /* make sure we ended with a ']' */
344 if (!bracket_seen) {
345 PyErr_SetString(PyExc_ValueError, "Missing ']' in format string");
346 return 0;
349 /* end of string is okay */
350 /* don't include the ']' */
351 name->end = self->ptr-1;
352 return 1;
355 /* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */
356 static int
357 FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
358 Py_ssize_t *name_idx, SubString *name)
360 /* check at end of input */
361 if (self->ptr >= self->str.end)
362 return 1;
364 switch (*self->ptr++) {
365 case '.':
366 *is_attribute = 1;
367 if (_FieldNameIterator_attr(self, name) == 0)
368 return 0;
369 *name_idx = -1;
370 break;
371 case '[':
372 *is_attribute = 0;
373 if (_FieldNameIterator_item(self, name) == 0)
374 return 0;
375 *name_idx = get_integer(name);
376 break;
377 default:
378 /* Invalid character follows ']' */
379 PyErr_SetString(PyExc_ValueError, "Only '.' or '[' may "
380 "follow ']' in format field specifier");
381 return 0;
384 /* empty string is an error */
385 if (name->ptr == name->end) {
386 PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");
387 return 0;
390 return 2;
394 /* input: field_name
395 output: 'first' points to the part before the first '[' or '.'
396 'first_idx' is -1 if 'first' is not an integer, otherwise
397 it's the value of first converted to an integer
398 'rest' is an iterator to return the rest
400 static int
401 field_name_split(STRINGLIB_CHAR *ptr, Py_ssize_t len, SubString *first,
402 Py_ssize_t *first_idx, FieldNameIterator *rest,
403 AutoNumber *auto_number)
405 STRINGLIB_CHAR c;
406 STRINGLIB_CHAR *p = ptr;
407 STRINGLIB_CHAR *end = ptr + len;
408 int field_name_is_empty;
409 int using_numeric_index;
411 /* find the part up until the first '.' or '[' */
412 while (p < end) {
413 switch (c = *p++) {
414 case '[':
415 case '.':
416 /* backup so that we this character is available to the
417 "rest" iterator */
418 p--;
419 break;
420 default:
421 continue;
423 break;
426 /* set up the return values */
427 SubString_init(first, ptr, p - ptr);
428 FieldNameIterator_init(rest, p, end - p);
430 /* see if "first" is an integer, in which case it's used as an index */
431 *first_idx = get_integer(first);
433 field_name_is_empty = first->ptr >= first->end;
435 /* If the field name is omitted or if we have a numeric index
436 specified, then we're doing numeric indexing into args. */
437 using_numeric_index = field_name_is_empty || *first_idx != -1;
439 /* We always get here exactly one time for each field we're
440 processing. And we get here in field order (counting by left
441 braces). So this is the perfect place to handle automatic field
442 numbering if the field name is omitted. */
444 /* Check if we need to do the auto-numbering. It's not needed if
445 we're called from string.Format routines, because it's handled
446 in that class by itself. */
447 if (auto_number) {
448 /* Initialize our auto numbering state if this is the first
449 time we're either auto-numbering or manually numbering. */
450 if (auto_number->an_state == ANS_INIT && using_numeric_index)
451 auto_number->an_state = field_name_is_empty ?
452 ANS_AUTO : ANS_MANUAL;
454 /* Make sure our state is consistent with what we're doing
455 this time through. Only check if we're using a numeric
456 index. */
457 if (using_numeric_index)
458 if (autonumber_state_error(auto_number->an_state,
459 field_name_is_empty))
460 return 0;
461 /* Zero length field means we want to do auto-numbering of the
462 fields. */
463 if (field_name_is_empty)
464 *first_idx = (auto_number->an_field_number)++;
467 return 1;
472 get_field_object returns the object inside {}, before the
473 format_spec. It handles getindex and getattr lookups and consumes
474 the entire input string.
476 static PyObject *
477 get_field_object(SubString *input, PyObject *args, PyObject *kwargs,
478 AutoNumber *auto_number)
480 PyObject *obj = NULL;
481 int ok;
482 int is_attribute;
483 SubString name;
484 SubString first;
485 Py_ssize_t index;
486 FieldNameIterator rest;
488 if (!field_name_split(input->ptr, input->end - input->ptr, &first,
489 &index, &rest, auto_number)) {
490 goto error;
493 if (index == -1) {
494 /* look up in kwargs */
495 PyObject *key = SubString_new_object(&first);
496 if (key == NULL)
497 goto error;
498 if ((kwargs == NULL) || (obj = PyDict_GetItem(kwargs, key)) == NULL) {
499 PyErr_SetObject(PyExc_KeyError, key);
500 Py_DECREF(key);
501 goto error;
503 Py_DECREF(key);
504 Py_INCREF(obj);
506 else {
507 /* look up in args */
508 obj = PySequence_GetItem(args, index);
509 if (obj == NULL)
510 goto error;
513 /* iterate over the rest of the field_name */
514 while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index,
515 &name)) == 2) {
516 PyObject *tmp;
518 if (is_attribute)
519 /* getattr lookup "." */
520 tmp = getattr(obj, &name);
521 else
522 /* getitem lookup "[]" */
523 if (index == -1)
524 tmp = getitem_str(obj, &name);
525 else
526 if (PySequence_Check(obj))
527 tmp = getitem_sequence(obj, index);
528 else
529 /* not a sequence */
530 tmp = getitem_idx(obj, index);
531 if (tmp == NULL)
532 goto error;
534 /* assign to obj */
535 Py_DECREF(obj);
536 obj = tmp;
538 /* end of iterator, this is the non-error case */
539 if (ok == 1)
540 return obj;
541 error:
542 Py_XDECREF(obj);
543 return NULL;
546 /************************************************************************/
547 /***************** Field rendering functions **************************/
548 /************************************************************************/
551 render_field() is the main function in this section. It takes the
552 field object and field specification string generated by
553 get_field_and_spec, and renders the field into the output string.
555 render_field calls fieldobj.__format__(format_spec) method, and
556 appends to the output.
558 static int
559 render_field(PyObject *fieldobj, SubString *format_spec, OutputString *output)
561 int ok = 0;
562 PyObject *result = NULL;
563 PyObject *format_spec_object = NULL;
564 PyObject *(*formatter)(PyObject *, STRINGLIB_CHAR *, Py_ssize_t) = NULL;
565 STRINGLIB_CHAR* format_spec_start = format_spec->ptr ?
566 format_spec->ptr : NULL;
567 Py_ssize_t format_spec_len = format_spec->ptr ?
568 format_spec->end - format_spec->ptr : 0;
570 /* If we know the type exactly, skip the lookup of __format__ and just
571 call the formatter directly. */
572 #if STRINGLIB_IS_UNICODE
573 if (PyUnicode_CheckExact(fieldobj))
574 formatter = _PyUnicode_FormatAdvanced;
575 /* Unfortunately, there's a problem with checking for int, long,
576 and float here. If we're being included as unicode, their
577 formatters expect string format_spec args. For now, just skip
578 this optimization for unicode. This could be fixed, but it's a
579 hassle. */
580 #else
581 if (PyString_CheckExact(fieldobj))
582 formatter = _PyBytes_FormatAdvanced;
583 else if (PyInt_CheckExact(fieldobj))
584 formatter =_PyInt_FormatAdvanced;
585 else if (PyLong_CheckExact(fieldobj))
586 formatter =_PyLong_FormatAdvanced;
587 else if (PyFloat_CheckExact(fieldobj))
588 formatter = _PyFloat_FormatAdvanced;
589 #endif
591 if (formatter) {
592 /* we know exactly which formatter will be called when __format__ is
593 looked up, so call it directly, instead. */
594 result = formatter(fieldobj, format_spec_start, format_spec_len);
596 else {
597 /* We need to create an object out of the pointers we have, because
598 __format__ takes a string/unicode object for format_spec. */
599 format_spec_object = STRINGLIB_NEW(format_spec_start,
600 format_spec_len);
601 if (format_spec_object == NULL)
602 goto done;
604 result = PyObject_Format(fieldobj, format_spec_object);
606 if (result == NULL)
607 goto done;
609 #if PY_VERSION_HEX >= 0x03000000
610 assert(PyUnicode_Check(result));
611 #else
612 assert(PyString_Check(result) || PyUnicode_Check(result));
614 /* Convert result to our type. We could be str, and result could
615 be unicode */
617 PyObject *tmp = STRINGLIB_TOSTR(result);
618 if (tmp == NULL)
619 goto done;
620 Py_DECREF(result);
621 result = tmp;
623 #endif
625 ok = output_data(output,
626 STRINGLIB_STR(result), STRINGLIB_LEN(result));
627 done:
628 Py_XDECREF(format_spec_object);
629 Py_XDECREF(result);
630 return ok;
633 static int
634 parse_field(SubString *str, SubString *field_name, SubString *format_spec,
635 STRINGLIB_CHAR *conversion)
637 /* Note this function works if the field name is zero length,
638 which is good. Zero length field names are handled later, in
639 field_name_split. */
641 STRINGLIB_CHAR c = 0;
643 /* initialize these, as they may be empty */
644 *conversion = '\0';
645 SubString_init(format_spec, NULL, 0);
647 /* Search for the field name. it's terminated by the end of
648 the string, or a ':' or '!' */
649 field_name->ptr = str->ptr;
650 while (str->ptr < str->end) {
651 switch (c = *(str->ptr++)) {
652 case ':':
653 case '!':
654 break;
655 default:
656 continue;
658 break;
661 if (c == '!' || c == ':') {
662 /* we have a format specifier and/or a conversion */
663 /* don't include the last character */
664 field_name->end = str->ptr-1;
666 /* the format specifier is the rest of the string */
667 format_spec->ptr = str->ptr;
668 format_spec->end = str->end;
670 /* see if there's a conversion specifier */
671 if (c == '!') {
672 /* there must be another character present */
673 if (format_spec->ptr >= format_spec->end) {
674 PyErr_SetString(PyExc_ValueError,
675 "end of format while looking for conversion "
676 "specifier");
677 return 0;
679 *conversion = *(format_spec->ptr++);
681 /* if there is another character, it must be a colon */
682 if (format_spec->ptr < format_spec->end) {
683 c = *(format_spec->ptr++);
684 if (c != ':') {
685 PyErr_SetString(PyExc_ValueError,
686 "expected ':' after format specifier");
687 return 0;
692 else
693 /* end of string, there's no format_spec or conversion */
694 field_name->end = str->ptr;
696 return 1;
699 /************************************************************************/
700 /******* Output string allocation and escape-to-markup processing ******/
701 /************************************************************************/
703 /* MarkupIterator breaks the string into pieces of either literal
704 text, or things inside {} that need to be marked up. it is
705 designed to make it easy to wrap a Python iterator around it, for
706 use with the Formatter class */
708 typedef struct {
709 SubString str;
710 } MarkupIterator;
712 static int
713 MarkupIterator_init(MarkupIterator *self, STRINGLIB_CHAR *ptr, Py_ssize_t len)
715 SubString_init(&self->str, ptr, len);
716 return 1;
719 /* returns 0 on error, 1 on non-error termination, and 2 if it got a
720 string (or something to be expanded) */
721 static int
722 MarkupIterator_next(MarkupIterator *self, SubString *literal,
723 int *field_present, SubString *field_name,
724 SubString *format_spec, STRINGLIB_CHAR *conversion,
725 int *format_spec_needs_expanding)
727 int at_end;
728 STRINGLIB_CHAR c = 0;
729 STRINGLIB_CHAR *start;
730 int count;
731 Py_ssize_t len;
732 int markup_follows = 0;
734 /* initialize all of the output variables */
735 SubString_init(literal, NULL, 0);
736 SubString_init(field_name, NULL, 0);
737 SubString_init(format_spec, NULL, 0);
738 *conversion = '\0';
739 *format_spec_needs_expanding = 0;
740 *field_present = 0;
742 /* No more input, end of iterator. This is the normal exit
743 path. */
744 if (self->str.ptr >= self->str.end)
745 return 1;
747 start = self->str.ptr;
749 /* First read any literal text. Read until the end of string, an
750 escaped '{' or '}', or an unescaped '{'. In order to never
751 allocate memory and so I can just pass pointers around, if
752 there's an escaped '{' or '}' then we'll return the literal
753 including the brace, but no format object. The next time
754 through, we'll return the rest of the literal, skipping past
755 the second consecutive brace. */
756 while (self->str.ptr < self->str.end) {
757 switch (c = *(self->str.ptr++)) {
758 case '{':
759 case '}':
760 markup_follows = 1;
761 break;
762 default:
763 continue;
765 break;
768 at_end = self->str.ptr >= self->str.end;
769 len = self->str.ptr - start;
771 if ((c == '}') && (at_end || (c != *self->str.ptr))) {
772 PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
773 "in format string");
774 return 0;
776 if (at_end && c == '{') {
777 PyErr_SetString(PyExc_ValueError, "Single '{' encountered "
778 "in format string");
779 return 0;
781 if (!at_end) {
782 if (c == *self->str.ptr) {
783 /* escaped } or {, skip it in the input. there is no
784 markup object following us, just this literal text */
785 self->str.ptr++;
786 markup_follows = 0;
788 else
789 len--;
792 /* record the literal text */
793 literal->ptr = start;
794 literal->end = start + len;
796 if (!markup_follows)
797 return 2;
799 /* this is markup, find the end of the string by counting nested
800 braces. note that this prohibits escaped braces, so that
801 format_specs cannot have braces in them. */
802 *field_present = 1;
803 count = 1;
805 start = self->str.ptr;
807 /* we know we can't have a zero length string, so don't worry
808 about that case */
809 while (self->str.ptr < self->str.end) {
810 switch (c = *(self->str.ptr++)) {
811 case '{':
812 /* the format spec needs to be recursively expanded.
813 this is an optimization, and not strictly needed */
814 *format_spec_needs_expanding = 1;
815 count++;
816 break;
817 case '}':
818 count--;
819 if (count <= 0) {
820 /* we're done. parse and get out */
821 SubString s;
823 SubString_init(&s, start, self->str.ptr - 1 - start);
824 if (parse_field(&s, field_name, format_spec, conversion) == 0)
825 return 0;
827 /* success */
828 return 2;
830 break;
834 /* end of string while searching for matching '}' */
835 PyErr_SetString(PyExc_ValueError, "unmatched '{' in format");
836 return 0;
840 /* do the !r or !s conversion on obj */
841 static PyObject *
842 do_conversion(PyObject *obj, STRINGLIB_CHAR conversion)
844 /* XXX in pre-3.0, do we need to convert this to unicode, since it
845 might have returned a string? */
846 switch (conversion) {
847 case 'r':
848 return PyObject_Repr(obj);
849 case 's':
850 return STRINGLIB_TOSTR(obj);
851 default:
852 if (conversion > 32 && conversion < 127) {
853 /* It's the ASCII subrange; casting to char is safe
854 (assuming the execution character set is an ASCII
855 superset). */
856 PyErr_Format(PyExc_ValueError,
857 "Unknown conversion specifier %c",
858 (char)conversion);
859 } else
860 PyErr_Format(PyExc_ValueError,
861 "Unknown conversion specifier \\x%x",
862 (unsigned int)conversion);
863 return NULL;
867 /* given:
869 {field_name!conversion:format_spec}
871 compute the result and write it to output.
872 format_spec_needs_expanding is an optimization. if it's false,
873 just output the string directly, otherwise recursively expand the
874 format_spec string.
876 field_name is allowed to be zero length, in which case we
877 are doing auto field numbering.
880 static int
881 output_markup(SubString *field_name, SubString *format_spec,
882 int format_spec_needs_expanding, STRINGLIB_CHAR conversion,
883 OutputString *output, PyObject *args, PyObject *kwargs,
884 int recursion_depth, AutoNumber *auto_number)
886 PyObject *tmp = NULL;
887 PyObject *fieldobj = NULL;
888 SubString expanded_format_spec;
889 SubString *actual_format_spec;
890 int result = 0;
892 /* convert field_name to an object */
893 fieldobj = get_field_object(field_name, args, kwargs, auto_number);
894 if (fieldobj == NULL)
895 goto done;
897 if (conversion != '\0') {
898 tmp = do_conversion(fieldobj, conversion);
899 if (tmp == NULL)
900 goto done;
902 /* do the assignment, transferring ownership: fieldobj = tmp */
903 Py_DECREF(fieldobj);
904 fieldobj = tmp;
905 tmp = NULL;
908 /* if needed, recurively compute the format_spec */
909 if (format_spec_needs_expanding) {
910 tmp = build_string(format_spec, args, kwargs, recursion_depth-1,
911 auto_number);
912 if (tmp == NULL)
913 goto done;
915 /* note that in the case we're expanding the format string,
916 tmp must be kept around until after the call to
917 render_field. */
918 SubString_init(&expanded_format_spec,
919 STRINGLIB_STR(tmp), STRINGLIB_LEN(tmp));
920 actual_format_spec = &expanded_format_spec;
922 else
923 actual_format_spec = format_spec;
925 if (render_field(fieldobj, actual_format_spec, output) == 0)
926 goto done;
928 result = 1;
930 done:
931 Py_XDECREF(fieldobj);
932 Py_XDECREF(tmp);
934 return result;
938 do_markup is the top-level loop for the format() method. It
939 searches through the format string for escapes to markup codes, and
940 calls other functions to move non-markup text to the output,
941 and to perform the markup to the output.
943 static int
944 do_markup(SubString *input, PyObject *args, PyObject *kwargs,
945 OutputString *output, int recursion_depth, AutoNumber *auto_number)
947 MarkupIterator iter;
948 int format_spec_needs_expanding;
949 int result;
950 int field_present;
951 SubString literal;
952 SubString field_name;
953 SubString format_spec;
954 STRINGLIB_CHAR conversion;
956 MarkupIterator_init(&iter, input->ptr, input->end - input->ptr);
957 while ((result = MarkupIterator_next(&iter, &literal, &field_present,
958 &field_name, &format_spec,
959 &conversion,
960 &format_spec_needs_expanding)) == 2) {
961 if (!output_data(output, literal.ptr, literal.end - literal.ptr))
962 return 0;
963 if (field_present)
964 if (!output_markup(&field_name, &format_spec,
965 format_spec_needs_expanding, conversion, output,
966 args, kwargs, recursion_depth, auto_number))
967 return 0;
969 return result;
974 build_string allocates the output string and then
975 calls do_markup to do the heavy lifting.
977 static PyObject *
978 build_string(SubString *input, PyObject *args, PyObject *kwargs,
979 int recursion_depth, AutoNumber *auto_number)
981 OutputString output;
982 PyObject *result = NULL;
983 Py_ssize_t count;
985 output.obj = NULL; /* needed so cleanup code always works */
987 /* check the recursion level */
988 if (recursion_depth <= 0) {
989 PyErr_SetString(PyExc_ValueError,
990 "Max string recursion exceeded");
991 goto done;
994 /* initial size is the length of the format string, plus the size
995 increment. seems like a reasonable default */
996 if (!output_initialize(&output,
997 input->end - input->ptr +
998 INITIAL_SIZE_INCREMENT))
999 goto done;
1001 if (!do_markup(input, args, kwargs, &output, recursion_depth,
1002 auto_number)) {
1003 goto done;
1006 count = output.ptr - STRINGLIB_STR(output.obj);
1007 if (STRINGLIB_RESIZE(&output.obj, count) < 0) {
1008 goto done;
1011 /* transfer ownership to result */
1012 result = output.obj;
1013 output.obj = NULL;
1015 done:
1016 Py_XDECREF(output.obj);
1017 return result;
1020 /************************************************************************/
1021 /*********** main routine ***********************************************/
1022 /************************************************************************/
1024 /* this is the main entry point */
1025 static PyObject *
1026 do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
1028 SubString input;
1030 /* PEP 3101 says only 2 levels, so that
1031 "{0:{1}}".format('abc', 's') # works
1032 "{0:{1:{2}}}".format('abc', 's', '') # fails
1034 int recursion_depth = 2;
1036 AutoNumber auto_number;
1038 AutoNumber_Init(&auto_number);
1039 SubString_init(&input, STRINGLIB_STR(self), STRINGLIB_LEN(self));
1040 return build_string(&input, args, kwargs, recursion_depth, &auto_number);
1045 /************************************************************************/
1046 /*********** formatteriterator ******************************************/
1047 /************************************************************************/
1049 /* This is used to implement string.Formatter.vparse(). It exists so
1050 Formatter can share code with the built in unicode.format() method.
1051 It's really just a wrapper around MarkupIterator that is callable
1052 from Python. */
1054 typedef struct {
1055 PyObject_HEAD
1057 STRINGLIB_OBJECT *str;
1059 MarkupIterator it_markup;
1060 } formatteriterobject;
1062 static void
1063 formatteriter_dealloc(formatteriterobject *it)
1065 Py_XDECREF(it->str);
1066 PyObject_FREE(it);
1069 /* returns a tuple:
1070 (literal, field_name, format_spec, conversion)
1072 literal is any literal text to output. might be zero length
1073 field_name is the string before the ':'. might be None
1074 format_spec is the string after the ':'. mibht be None
1075 conversion is either None, or the string after the '!'
1077 static PyObject *
1078 formatteriter_next(formatteriterobject *it)
1080 SubString literal;
1081 SubString field_name;
1082 SubString format_spec;
1083 STRINGLIB_CHAR conversion;
1084 int format_spec_needs_expanding;
1085 int field_present;
1086 int result = MarkupIterator_next(&it->it_markup, &literal, &field_present,
1087 &field_name, &format_spec, &conversion,
1088 &format_spec_needs_expanding);
1090 /* all of the SubString objects point into it->str, so no
1091 memory management needs to be done on them */
1092 assert(0 <= result && result <= 2);
1093 if (result == 0 || result == 1)
1094 /* if 0, error has already been set, if 1, iterator is empty */
1095 return NULL;
1096 else {
1097 PyObject *literal_str = NULL;
1098 PyObject *field_name_str = NULL;
1099 PyObject *format_spec_str = NULL;
1100 PyObject *conversion_str = NULL;
1101 PyObject *tuple = NULL;
1103 literal_str = SubString_new_object(&literal);
1104 if (literal_str == NULL)
1105 goto done;
1107 field_name_str = SubString_new_object(&field_name);
1108 if (field_name_str == NULL)
1109 goto done;
1111 /* if field_name is non-zero length, return a string for
1112 format_spec (even if zero length), else return None */
1113 format_spec_str = (field_present ?
1114 SubString_new_object_or_empty :
1115 SubString_new_object)(&format_spec);
1116 if (format_spec_str == NULL)
1117 goto done;
1119 /* if the conversion is not specified, return a None,
1120 otherwise create a one length string with the conversion
1121 character */
1122 if (conversion == '\0') {
1123 conversion_str = Py_None;
1124 Py_INCREF(conversion_str);
1126 else
1127 conversion_str = STRINGLIB_NEW(&conversion, 1);
1128 if (conversion_str == NULL)
1129 goto done;
1131 tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str,
1132 conversion_str);
1133 done:
1134 Py_XDECREF(literal_str);
1135 Py_XDECREF(field_name_str);
1136 Py_XDECREF(format_spec_str);
1137 Py_XDECREF(conversion_str);
1138 return tuple;
1142 static PyMethodDef formatteriter_methods[] = {
1143 {NULL, NULL} /* sentinel */
1146 static PyTypeObject PyFormatterIter_Type = {
1147 PyVarObject_HEAD_INIT(&PyType_Type, 0)
1148 "formatteriterator", /* tp_name */
1149 sizeof(formatteriterobject), /* tp_basicsize */
1150 0, /* tp_itemsize */
1151 /* methods */
1152 (destructor)formatteriter_dealloc, /* tp_dealloc */
1153 0, /* tp_print */
1154 0, /* tp_getattr */
1155 0, /* tp_setattr */
1156 0, /* tp_compare */
1157 0, /* tp_repr */
1158 0, /* tp_as_number */
1159 0, /* tp_as_sequence */
1160 0, /* tp_as_mapping */
1161 0, /* tp_hash */
1162 0, /* tp_call */
1163 0, /* tp_str */
1164 PyObject_GenericGetAttr, /* tp_getattro */
1165 0, /* tp_setattro */
1166 0, /* tp_as_buffer */
1167 Py_TPFLAGS_DEFAULT, /* tp_flags */
1168 0, /* tp_doc */
1169 0, /* tp_traverse */
1170 0, /* tp_clear */
1171 0, /* tp_richcompare */
1172 0, /* tp_weaklistoffset */
1173 PyObject_SelfIter, /* tp_iter */
1174 (iternextfunc)formatteriter_next, /* tp_iternext */
1175 formatteriter_methods, /* tp_methods */
1179 /* unicode_formatter_parser is used to implement
1180 string.Formatter.vformat. it parses a string and returns tuples
1181 describing the parsed elements. It's a wrapper around
1182 stringlib/string_format.h's MarkupIterator */
1183 static PyObject *
1184 formatter_parser(STRINGLIB_OBJECT *self)
1186 formatteriterobject *it;
1188 it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
1189 if (it == NULL)
1190 return NULL;
1192 /* take ownership, give the object to the iterator */
1193 Py_INCREF(self);
1194 it->str = self;
1196 /* initialize the contained MarkupIterator */
1197 MarkupIterator_init(&it->it_markup,
1198 STRINGLIB_STR(self),
1199 STRINGLIB_LEN(self));
1201 return (PyObject *)it;
1205 /************************************************************************/
1206 /*********** fieldnameiterator ******************************************/
1207 /************************************************************************/
1210 /* This is used to implement string.Formatter.vparse(). It parses the
1211 field name into attribute and item values. It's a Python-callable
1212 wrapper around FieldNameIterator */
1214 typedef struct {
1215 PyObject_HEAD
1217 STRINGLIB_OBJECT *str;
1219 FieldNameIterator it_field;
1220 } fieldnameiterobject;
1222 static void
1223 fieldnameiter_dealloc(fieldnameiterobject *it)
1225 Py_XDECREF(it->str);
1226 PyObject_FREE(it);
1229 /* returns a tuple:
1230 (is_attr, value)
1231 is_attr is true if we used attribute syntax (e.g., '.foo')
1232 false if we used index syntax (e.g., '[foo]')
1233 value is an integer or string
1235 static PyObject *
1236 fieldnameiter_next(fieldnameiterobject *it)
1238 int result;
1239 int is_attr;
1240 Py_ssize_t idx;
1241 SubString name;
1243 result = FieldNameIterator_next(&it->it_field, &is_attr,
1244 &idx, &name);
1245 if (result == 0 || result == 1)
1246 /* if 0, error has already been set, if 1, iterator is empty */
1247 return NULL;
1248 else {
1249 PyObject* result = NULL;
1250 PyObject* is_attr_obj = NULL;
1251 PyObject* obj = NULL;
1253 is_attr_obj = PyBool_FromLong(is_attr);
1254 if (is_attr_obj == NULL)
1255 goto done;
1257 /* either an integer or a string */
1258 if (idx != -1)
1259 obj = PyLong_FromSsize_t(idx);
1260 else
1261 obj = SubString_new_object(&name);
1262 if (obj == NULL)
1263 goto done;
1265 /* return a tuple of values */
1266 result = PyTuple_Pack(2, is_attr_obj, obj);
1268 done:
1269 Py_XDECREF(is_attr_obj);
1270 Py_XDECREF(obj);
1271 return result;
1275 static PyMethodDef fieldnameiter_methods[] = {
1276 {NULL, NULL} /* sentinel */
1279 static PyTypeObject PyFieldNameIter_Type = {
1280 PyVarObject_HEAD_INIT(&PyType_Type, 0)
1281 "fieldnameiterator", /* tp_name */
1282 sizeof(fieldnameiterobject), /* tp_basicsize */
1283 0, /* tp_itemsize */
1284 /* methods */
1285 (destructor)fieldnameiter_dealloc, /* tp_dealloc */
1286 0, /* tp_print */
1287 0, /* tp_getattr */
1288 0, /* tp_setattr */
1289 0, /* tp_compare */
1290 0, /* tp_repr */
1291 0, /* tp_as_number */
1292 0, /* tp_as_sequence */
1293 0, /* tp_as_mapping */
1294 0, /* tp_hash */
1295 0, /* tp_call */
1296 0, /* tp_str */
1297 PyObject_GenericGetAttr, /* tp_getattro */
1298 0, /* tp_setattro */
1299 0, /* tp_as_buffer */
1300 Py_TPFLAGS_DEFAULT, /* tp_flags */
1301 0, /* tp_doc */
1302 0, /* tp_traverse */
1303 0, /* tp_clear */
1304 0, /* tp_richcompare */
1305 0, /* tp_weaklistoffset */
1306 PyObject_SelfIter, /* tp_iter */
1307 (iternextfunc)fieldnameiter_next, /* tp_iternext */
1308 fieldnameiter_methods, /* tp_methods */
1311 /* unicode_formatter_field_name_split is used to implement
1312 string.Formatter.vformat. it takes an PEP 3101 "field name", and
1313 returns a tuple of (first, rest): "first", the part before the
1314 first '.' or '['; and "rest", an iterator for the rest of the field
1315 name. it's a wrapper around stringlib/string_format.h's
1316 field_name_split. The iterator it returns is a
1317 FieldNameIterator */
1318 static PyObject *
1319 formatter_field_name_split(STRINGLIB_OBJECT *self)
1321 SubString first;
1322 Py_ssize_t first_idx;
1323 fieldnameiterobject *it;
1325 PyObject *first_obj = NULL;
1326 PyObject *result = NULL;
1328 it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
1329 if (it == NULL)
1330 return NULL;
1332 /* take ownership, give the object to the iterator. this is
1333 just to keep the field_name alive */
1334 Py_INCREF(self);
1335 it->str = self;
1337 /* Pass in auto_number = NULL. We'll return an empty string for
1338 first_obj in that case. */
1339 if (!field_name_split(STRINGLIB_STR(self),
1340 STRINGLIB_LEN(self),
1341 &first, &first_idx, &it->it_field, NULL))
1342 goto done;
1344 /* first becomes an integer, if possible; else a string */
1345 if (first_idx != -1)
1346 first_obj = PyLong_FromSsize_t(first_idx);
1347 else
1348 /* convert "first" into a string object */
1349 first_obj = SubString_new_object(&first);
1350 if (first_obj == NULL)
1351 goto done;
1353 /* return a tuple of values */
1354 result = PyTuple_Pack(2, first_obj, it);
1356 done:
1357 Py_XDECREF(it);
1358 Py_XDECREF(first_obj);
1359 return result;