Updated to reflect change in logging.config to remove out-of-date comment in _install...
[python.git] / Objects / stringlib / string_format.h
blobbe8e8080851697ebf8259e72593f592992ebef15
1 /*
2 string_format.h -- implementation of string.format().
4 It uses the Objects/stringlib conventions, so that it can be
5 compiled for both unicode and string objects.
6 */
9 /* Defines for Python 2.6 compatability */
10 #if PY_VERSION_HEX < 0x03000000
11 #define PyLong_FromSsize_t _PyLong_FromSsize_t
12 #endif
14 /* Defines for more efficiently reallocating the string buffer */
15 #define INITIAL_SIZE_INCREMENT 100
16 #define SIZE_MULTIPLIER 2
17 #define MAX_SIZE_INCREMENT 3200
20 /************************************************************************/
21 /*********** Global data structures and forward declarations *********/
22 /************************************************************************/
25 A SubString consists of the characters between two string or
26 unicode pointers.
28 typedef struct {
29 STRINGLIB_CHAR *ptr;
30 STRINGLIB_CHAR *end;
31 } SubString;
34 /* forward declaration for recursion */
35 static PyObject *
36 build_string(SubString *input, PyObject *args, PyObject *kwargs,
37 int recursion_depth);
41 /************************************************************************/
42 /************************** Utility functions ************************/
43 /************************************************************************/
45 /* fill in a SubString from a pointer and length */
46 Py_LOCAL_INLINE(void)
47 SubString_init(SubString *str, STRINGLIB_CHAR *p, Py_ssize_t len)
49 str->ptr = p;
50 if (p == NULL)
51 str->end = NULL;
52 else
53 str->end = str->ptr + len;
56 /* return a new string. if str->ptr is NULL, return None */
57 Py_LOCAL_INLINE(PyObject *)
58 SubString_new_object(SubString *str)
60 if (str->ptr == NULL) {
61 Py_INCREF(Py_None);
62 return Py_None;
64 return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
67 /* return a new string. if str->ptr is NULL, return None */
68 Py_LOCAL_INLINE(PyObject *)
69 SubString_new_object_or_empty(SubString *str)
71 if (str->ptr == NULL) {
72 return STRINGLIB_NEW(NULL, 0);
74 return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
77 /************************************************************************/
78 /*********** Output string management functions ****************/
79 /************************************************************************/
81 typedef struct {
82 STRINGLIB_CHAR *ptr;
83 STRINGLIB_CHAR *end;
84 PyObject *obj;
85 Py_ssize_t size_increment;
86 } OutputString;
88 /* initialize an OutputString object, reserving size characters */
89 static int
90 output_initialize(OutputString *output, Py_ssize_t size)
92 output->obj = STRINGLIB_NEW(NULL, size);
93 if (output->obj == NULL)
94 return 0;
96 output->ptr = STRINGLIB_STR(output->obj);
97 output->end = STRINGLIB_LEN(output->obj) + output->ptr;
98 output->size_increment = INITIAL_SIZE_INCREMENT;
100 return 1;
104 output_extend reallocates the output string buffer.
105 It returns a status: 0 for a failed reallocation,
106 1 for success.
109 static int
110 output_extend(OutputString *output, Py_ssize_t count)
112 STRINGLIB_CHAR *startptr = STRINGLIB_STR(output->obj);
113 Py_ssize_t curlen = output->ptr - startptr;
114 Py_ssize_t maxlen = curlen + count + output->size_increment;
116 if (STRINGLIB_RESIZE(&output->obj, maxlen) < 0)
117 return 0;
118 startptr = STRINGLIB_STR(output->obj);
119 output->ptr = startptr + curlen;
120 output->end = startptr + maxlen;
121 if (output->size_increment < MAX_SIZE_INCREMENT)
122 output->size_increment *= SIZE_MULTIPLIER;
123 return 1;
127 output_data dumps characters into our output string
128 buffer.
130 In some cases, it has to reallocate the string.
132 It returns a status: 0 for a failed reallocation,
133 1 for success.
135 static int
136 output_data(OutputString *output, const STRINGLIB_CHAR *s, Py_ssize_t count)
138 if ((count > output->end - output->ptr) && !output_extend(output, count))
139 return 0;
140 memcpy(output->ptr, s, count * sizeof(STRINGLIB_CHAR));
141 output->ptr += count;
142 return 1;
145 /************************************************************************/
146 /*********** Format string parsing -- integers and identifiers *********/
147 /************************************************************************/
149 static Py_ssize_t
150 get_integer(const SubString *str)
152 Py_ssize_t accumulator = 0;
153 Py_ssize_t digitval;
154 Py_ssize_t oldaccumulator;
155 STRINGLIB_CHAR *p;
157 /* empty string is an error */
158 if (str->ptr >= str->end)
159 return -1;
161 for (p = str->ptr; p < str->end; p++) {
162 digitval = STRINGLIB_TODECIMAL(*p);
163 if (digitval < 0)
164 return -1;
166 This trick was copied from old Unicode format code. It's cute,
167 but would really suck on an old machine with a slow divide
168 implementation. Fortunately, in the normal case we do not
169 expect too many digits.
171 oldaccumulator = accumulator;
172 accumulator *= 10;
173 if ((accumulator+10)/10 != oldaccumulator+1) {
174 PyErr_Format(PyExc_ValueError,
175 "Too many decimal digits in format string");
176 return -1;
178 accumulator += digitval;
180 return accumulator;
183 /************************************************************************/
184 /******** Functions to get field objects and specification strings ******/
185 /************************************************************************/
187 /* do the equivalent of obj.name */
188 static PyObject *
189 getattr(PyObject *obj, SubString *name)
191 PyObject *newobj;
192 PyObject *str = SubString_new_object(name);
193 if (str == NULL)
194 return NULL;
195 newobj = PyObject_GetAttr(obj, str);
196 Py_DECREF(str);
197 return newobj;
200 /* do the equivalent of obj[idx], where obj is a sequence */
201 static PyObject *
202 getitem_sequence(PyObject *obj, Py_ssize_t idx)
204 return PySequence_GetItem(obj, idx);
207 /* do the equivalent of obj[idx], where obj is not a sequence */
208 static PyObject *
209 getitem_idx(PyObject *obj, Py_ssize_t idx)
211 PyObject *newobj;
212 PyObject *idx_obj = PyLong_FromSsize_t(idx);
213 if (idx_obj == NULL)
214 return NULL;
215 newobj = PyObject_GetItem(obj, idx_obj);
216 Py_DECREF(idx_obj);
217 return newobj;
220 /* do the equivalent of obj[name] */
221 static PyObject *
222 getitem_str(PyObject *obj, SubString *name)
224 PyObject *newobj;
225 PyObject *str = SubString_new_object(name);
226 if (str == NULL)
227 return NULL;
228 newobj = PyObject_GetItem(obj, str);
229 Py_DECREF(str);
230 return newobj;
233 typedef struct {
234 /* the entire string we're parsing. we assume that someone else
235 is managing its lifetime, and that it will exist for the
236 lifetime of the iterator. can be empty */
237 SubString str;
239 /* pointer to where we are inside field_name */
240 STRINGLIB_CHAR *ptr;
241 } FieldNameIterator;
244 static int
245 FieldNameIterator_init(FieldNameIterator *self, STRINGLIB_CHAR *ptr,
246 Py_ssize_t len)
248 SubString_init(&self->str, ptr, len);
249 self->ptr = self->str.ptr;
250 return 1;
253 static int
254 _FieldNameIterator_attr(FieldNameIterator *self, SubString *name)
256 STRINGLIB_CHAR c;
258 name->ptr = self->ptr;
260 /* return everything until '.' or '[' */
261 while (self->ptr < self->str.end) {
262 switch (c = *self->ptr++) {
263 case '[':
264 case '.':
265 /* backup so that we this character will be seen next time */
266 self->ptr--;
267 break;
268 default:
269 continue;
271 break;
273 /* end of string is okay */
274 name->end = self->ptr;
275 return 1;
278 static int
279 _FieldNameIterator_item(FieldNameIterator *self, SubString *name)
281 int bracket_seen = 0;
282 STRINGLIB_CHAR c;
284 name->ptr = self->ptr;
286 /* return everything until ']' */
287 while (self->ptr < self->str.end) {
288 switch (c = *self->ptr++) {
289 case ']':
290 bracket_seen = 1;
291 break;
292 default:
293 continue;
295 break;
297 /* make sure we ended with a ']' */
298 if (!bracket_seen) {
299 PyErr_SetString(PyExc_ValueError, "Missing ']' in format string");
300 return 0;
303 /* end of string is okay */
304 /* don't include the ']' */
305 name->end = self->ptr-1;
306 return 1;
309 /* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */
310 static int
311 FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
312 Py_ssize_t *name_idx, SubString *name)
314 /* check at end of input */
315 if (self->ptr >= self->str.end)
316 return 1;
318 switch (*self->ptr++) {
319 case '.':
320 *is_attribute = 1;
321 if (_FieldNameIterator_attr(self, name) == 0)
322 return 0;
323 *name_idx = -1;
324 break;
325 case '[':
326 *is_attribute = 0;
327 if (_FieldNameIterator_item(self, name) == 0)
328 return 0;
329 *name_idx = get_integer(name);
330 break;
331 default:
332 /* interal error, can't get here */
333 assert(0);
334 return 0;
337 /* empty string is an error */
338 if (name->ptr == name->end) {
339 PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");
340 return 0;
343 return 2;
347 /* input: field_name
348 output: 'first' points to the part before the first '[' or '.'
349 'first_idx' is -1 if 'first' is not an integer, otherwise
350 it's the value of first converted to an integer
351 'rest' is an iterator to return the rest
353 static int
354 field_name_split(STRINGLIB_CHAR *ptr, Py_ssize_t len, SubString *first,
355 Py_ssize_t *first_idx, FieldNameIterator *rest)
357 STRINGLIB_CHAR c;
358 STRINGLIB_CHAR *p = ptr;
359 STRINGLIB_CHAR *end = ptr + len;
361 /* find the part up until the first '.' or '[' */
362 while (p < end) {
363 switch (c = *p++) {
364 case '[':
365 case '.':
366 /* backup so that we this character is available to the
367 "rest" iterator */
368 p--;
369 break;
370 default:
371 continue;
373 break;
376 /* set up the return values */
377 SubString_init(first, ptr, p - ptr);
378 FieldNameIterator_init(rest, p, end - p);
380 /* see if "first" is an integer, in which case it's used as an index */
381 *first_idx = get_integer(first);
383 /* zero length string is an error */
384 if (first->ptr >= first->end) {
385 PyErr_SetString(PyExc_ValueError, "empty field name");
386 goto error;
389 return 1;
390 error:
391 return 0;
396 get_field_object returns the object inside {}, before the
397 format_spec. It handles getindex and getattr lookups and consumes
398 the entire input string.
400 static PyObject *
401 get_field_object(SubString *input, PyObject *args, PyObject *kwargs)
403 PyObject *obj = NULL;
404 int ok;
405 int is_attribute;
406 SubString name;
407 SubString first;
408 Py_ssize_t index;
409 FieldNameIterator rest;
411 if (!field_name_split(input->ptr, input->end - input->ptr, &first,
412 &index, &rest)) {
413 goto error;
416 if (index == -1) {
417 /* look up in kwargs */
418 PyObject *key = SubString_new_object(&first);
419 if (key == NULL)
420 goto error;
421 if ((kwargs == NULL) || (obj = PyDict_GetItem(kwargs, key)) == NULL) {
422 PyErr_SetObject(PyExc_KeyError, key);
423 Py_DECREF(key);
424 goto error;
426 Py_DECREF(key);
427 Py_INCREF(obj);
429 else {
430 /* look up in args */
431 obj = PySequence_GetItem(args, index);
432 if (obj == NULL)
433 goto error;
436 /* iterate over the rest of the field_name */
437 while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index,
438 &name)) == 2) {
439 PyObject *tmp;
441 if (is_attribute)
442 /* getattr lookup "." */
443 tmp = getattr(obj, &name);
444 else
445 /* getitem lookup "[]" */
446 if (index == -1)
447 tmp = getitem_str(obj, &name);
448 else
449 if (PySequence_Check(obj))
450 tmp = getitem_sequence(obj, index);
451 else
452 /* not a sequence */
453 tmp = getitem_idx(obj, index);
454 if (tmp == NULL)
455 goto error;
457 /* assign to obj */
458 Py_DECREF(obj);
459 obj = tmp;
461 /* end of iterator, this is the non-error case */
462 if (ok == 1)
463 return obj;
464 error:
465 Py_XDECREF(obj);
466 return NULL;
469 /************************************************************************/
470 /***************** Field rendering functions **************************/
471 /************************************************************************/
474 render_field() is the main function in this section. It takes the
475 field object and field specification string generated by
476 get_field_and_spec, and renders the field into the output string.
478 render_field calls fieldobj.__format__(format_spec) method, and
479 appends to the output.
481 static int
482 render_field(PyObject *fieldobj, SubString *format_spec, OutputString *output)
484 int ok = 0;
485 PyObject *result = NULL;
487 /* we need to create an object out of the pointers we have */
488 PyObject *format_spec_object = SubString_new_object_or_empty(format_spec);
489 if (format_spec_object == NULL)
490 goto done;
492 result = PyObject_Format(fieldobj, format_spec_object);
493 if (result == NULL)
494 goto done;
496 #if PY_VERSION_HEX >= 0x03000000
497 assert(PyUnicode_Check(result));
498 #else
499 assert(PyString_Check(result) || PyUnicode_Check(result));
501 /* Convert result to our type. We could be str, and result could
502 be unicode */
504 PyObject *tmp = STRINGLIB_TOSTR(result);
505 if (tmp == NULL)
506 goto done;
507 Py_DECREF(result);
508 result = tmp;
510 #endif
512 ok = output_data(output,
513 STRINGLIB_STR(result), STRINGLIB_LEN(result));
514 done:
515 Py_DECREF(format_spec_object);
516 Py_XDECREF(result);
517 return ok;
520 static int
521 parse_field(SubString *str, SubString *field_name, SubString *format_spec,
522 STRINGLIB_CHAR *conversion)
524 STRINGLIB_CHAR c = 0;
526 /* initialize these, as they may be empty */
527 *conversion = '\0';
528 SubString_init(format_spec, NULL, 0);
530 /* search for the field name. it's terminated by the end of the
531 string, or a ':' or '!' */
532 field_name->ptr = str->ptr;
533 while (str->ptr < str->end) {
534 switch (c = *(str->ptr++)) {
535 case ':':
536 case '!':
537 break;
538 default:
539 continue;
541 break;
544 if (c == '!' || c == ':') {
545 /* we have a format specifier and/or a conversion */
546 /* don't include the last character */
547 field_name->end = str->ptr-1;
549 /* the format specifier is the rest of the string */
550 format_spec->ptr = str->ptr;
551 format_spec->end = str->end;
553 /* see if there's a conversion specifier */
554 if (c == '!') {
555 /* there must be another character present */
556 if (format_spec->ptr >= format_spec->end) {
557 PyErr_SetString(PyExc_ValueError,
558 "end of format while looking for conversion "
559 "specifier");
560 return 0;
562 *conversion = *(format_spec->ptr++);
564 /* if there is another character, it must be a colon */
565 if (format_spec->ptr < format_spec->end) {
566 c = *(format_spec->ptr++);
567 if (c != ':') {
568 PyErr_SetString(PyExc_ValueError,
569 "expected ':' after format specifier");
570 return 0;
575 return 1;
578 else {
579 /* end of string, there's no format_spec or conversion */
580 field_name->end = str->ptr;
581 return 1;
585 /************************************************************************/
586 /******* Output string allocation and escape-to-markup processing ******/
587 /************************************************************************/
589 /* MarkupIterator breaks the string into pieces of either literal
590 text, or things inside {} that need to be marked up. it is
591 designed to make it easy to wrap a Python iterator around it, for
592 use with the Formatter class */
594 typedef struct {
595 SubString str;
596 } MarkupIterator;
598 static int
599 MarkupIterator_init(MarkupIterator *self, STRINGLIB_CHAR *ptr, Py_ssize_t len)
601 SubString_init(&self->str, ptr, len);
602 return 1;
605 /* returns 0 on error, 1 on non-error termination, and 2 if it got a
606 string (or something to be expanded) */
607 static int
608 MarkupIterator_next(MarkupIterator *self, SubString *literal,
609 SubString *field_name, SubString *format_spec,
610 STRINGLIB_CHAR *conversion,
611 int *format_spec_needs_expanding)
613 int at_end;
614 STRINGLIB_CHAR c = 0;
615 STRINGLIB_CHAR *start;
616 int count;
617 Py_ssize_t len;
618 int markup_follows = 0;
620 /* initialize all of the output variables */
621 SubString_init(literal, NULL, 0);
622 SubString_init(field_name, NULL, 0);
623 SubString_init(format_spec, NULL, 0);
624 *conversion = '\0';
625 *format_spec_needs_expanding = 0;
627 /* No more input, end of iterator. This is the normal exit
628 path. */
629 if (self->str.ptr >= self->str.end)
630 return 1;
632 start = self->str.ptr;
634 /* First read any literal text. Read until the end of string, an
635 escaped '{' or '}', or an unescaped '{'. In order to never
636 allocate memory and so I can just pass pointers around, if
637 there's an escaped '{' or '}' then we'll return the literal
638 including the brace, but no format object. The next time
639 through, we'll return the rest of the literal, skipping past
640 the second consecutive brace. */
641 while (self->str.ptr < self->str.end) {
642 switch (c = *(self->str.ptr++)) {
643 case '{':
644 case '}':
645 markup_follows = 1;
646 break;
647 default:
648 continue;
650 break;
653 at_end = self->str.ptr >= self->str.end;
654 len = self->str.ptr - start;
656 if ((c == '}') && (at_end || (c != *self->str.ptr))) {
657 PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
658 "in format string");
659 return 0;
661 if (at_end && c == '{') {
662 PyErr_SetString(PyExc_ValueError, "Single '{' encountered "
663 "in format string");
664 return 0;
666 if (!at_end) {
667 if (c == *self->str.ptr) {
668 /* escaped } or {, skip it in the input. there is no
669 markup object following us, just this literal text */
670 self->str.ptr++;
671 markup_follows = 0;
673 else
674 len--;
677 /* record the literal text */
678 literal->ptr = start;
679 literal->end = start + len;
681 if (!markup_follows)
682 return 2;
684 /* this is markup, find the end of the string by counting nested
685 braces. note that this prohibits escaped braces, so that
686 format_specs cannot have braces in them. */
687 count = 1;
689 start = self->str.ptr;
691 /* we know we can't have a zero length string, so don't worry
692 about that case */
693 while (self->str.ptr < self->str.end) {
694 switch (c = *(self->str.ptr++)) {
695 case '{':
696 /* the format spec needs to be recursively expanded.
697 this is an optimization, and not strictly needed */
698 *format_spec_needs_expanding = 1;
699 count++;
700 break;
701 case '}':
702 count--;
703 if (count <= 0) {
704 /* we're done. parse and get out */
705 SubString s;
707 SubString_init(&s, start, self->str.ptr - 1 - start);
708 if (parse_field(&s, field_name, format_spec, conversion) == 0)
709 return 0;
711 /* a zero length field_name is an error */
712 if (field_name->ptr == field_name->end) {
713 PyErr_SetString(PyExc_ValueError, "zero length field name "
714 "in format");
715 return 0;
718 /* success */
719 return 2;
721 break;
725 /* end of string while searching for matching '}' */
726 PyErr_SetString(PyExc_ValueError, "unmatched '{' in format");
727 return 0;
731 /* do the !r or !s conversion on obj */
732 static PyObject *
733 do_conversion(PyObject *obj, STRINGLIB_CHAR conversion)
735 /* XXX in pre-3.0, do we need to convert this to unicode, since it
736 might have returned a string? */
737 switch (conversion) {
738 case 'r':
739 return PyObject_Repr(obj);
740 case 's':
741 return STRINGLIB_TOSTR(obj);
742 default:
743 if (conversion > 32 && conversion < 127) {
744 /* It's the ASCII subrange; casting to char is safe
745 (assuming the execution character set is an ASCII
746 superset). */
747 PyErr_Format(PyExc_ValueError,
748 "Unknown conversion specifier %c",
749 (char)conversion);
750 } else
751 PyErr_Format(PyExc_ValueError,
752 "Unknown conversion specifier \\x%x",
753 (unsigned int)conversion);
754 return NULL;
758 /* given:
760 {field_name!conversion:format_spec}
762 compute the result and write it to output.
763 format_spec_needs_expanding is an optimization. if it's false,
764 just output the string directly, otherwise recursively expand the
765 format_spec string. */
767 static int
768 output_markup(SubString *field_name, SubString *format_spec,
769 int format_spec_needs_expanding, STRINGLIB_CHAR conversion,
770 OutputString *output, PyObject *args, PyObject *kwargs,
771 int recursion_depth)
773 PyObject *tmp = NULL;
774 PyObject *fieldobj = NULL;
775 SubString expanded_format_spec;
776 SubString *actual_format_spec;
777 int result = 0;
779 /* convert field_name to an object */
780 fieldobj = get_field_object(field_name, args, kwargs);
781 if (fieldobj == NULL)
782 goto done;
784 if (conversion != '\0') {
785 tmp = do_conversion(fieldobj, conversion);
786 if (tmp == NULL)
787 goto done;
789 /* do the assignment, transferring ownership: fieldobj = tmp */
790 Py_DECREF(fieldobj);
791 fieldobj = tmp;
792 tmp = NULL;
795 /* if needed, recurively compute the format_spec */
796 if (format_spec_needs_expanding) {
797 tmp = build_string(format_spec, args, kwargs, recursion_depth-1);
798 if (tmp == NULL)
799 goto done;
801 /* note that in the case we're expanding the format string,
802 tmp must be kept around until after the call to
803 render_field. */
804 SubString_init(&expanded_format_spec,
805 STRINGLIB_STR(tmp), STRINGLIB_LEN(tmp));
806 actual_format_spec = &expanded_format_spec;
808 else
809 actual_format_spec = format_spec;
811 if (render_field(fieldobj, actual_format_spec, output) == 0)
812 goto done;
814 result = 1;
816 done:
817 Py_XDECREF(fieldobj);
818 Py_XDECREF(tmp);
820 return result;
824 do_markup is the top-level loop for the format() method. It
825 searches through the format string for escapes to markup codes, and
826 calls other functions to move non-markup text to the output,
827 and to perform the markup to the output.
829 static int
830 do_markup(SubString *input, PyObject *args, PyObject *kwargs,
831 OutputString *output, int recursion_depth)
833 MarkupIterator iter;
834 int format_spec_needs_expanding;
835 int result;
836 SubString literal;
837 SubString field_name;
838 SubString format_spec;
839 STRINGLIB_CHAR conversion;
841 MarkupIterator_init(&iter, input->ptr, input->end - input->ptr);
842 while ((result = MarkupIterator_next(&iter, &literal, &field_name,
843 &format_spec, &conversion,
844 &format_spec_needs_expanding)) == 2) {
845 if (!output_data(output, literal.ptr, literal.end - literal.ptr))
846 return 0;
847 if (field_name.ptr != field_name.end)
848 if (!output_markup(&field_name, &format_spec,
849 format_spec_needs_expanding, conversion, output,
850 args, kwargs, recursion_depth))
851 return 0;
853 return result;
858 build_string allocates the output string and then
859 calls do_markup to do the heavy lifting.
861 static PyObject *
862 build_string(SubString *input, PyObject *args, PyObject *kwargs,
863 int recursion_depth)
865 OutputString output;
866 PyObject *result = NULL;
867 Py_ssize_t count;
869 output.obj = NULL; /* needed so cleanup code always works */
871 /* check the recursion level */
872 if (recursion_depth <= 0) {
873 PyErr_SetString(PyExc_ValueError,
874 "Max string recursion exceeded");
875 goto done;
878 /* initial size is the length of the format string, plus the size
879 increment. seems like a reasonable default */
880 if (!output_initialize(&output,
881 input->end - input->ptr +
882 INITIAL_SIZE_INCREMENT))
883 goto done;
885 if (!do_markup(input, args, kwargs, &output, recursion_depth)) {
886 goto done;
889 count = output.ptr - STRINGLIB_STR(output.obj);
890 if (STRINGLIB_RESIZE(&output.obj, count) < 0) {
891 goto done;
894 /* transfer ownership to result */
895 result = output.obj;
896 output.obj = NULL;
898 done:
899 Py_XDECREF(output.obj);
900 return result;
903 /************************************************************************/
904 /*********** main routine ***********************************************/
905 /************************************************************************/
907 /* this is the main entry point */
908 static PyObject *
909 do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
911 SubString input;
913 /* PEP 3101 says only 2 levels, so that
914 "{0:{1}}".format('abc', 's') # works
915 "{0:{1:{2}}}".format('abc', 's', '') # fails
917 int recursion_depth = 2;
919 SubString_init(&input, STRINGLIB_STR(self), STRINGLIB_LEN(self));
920 return build_string(&input, args, kwargs, recursion_depth);
925 /************************************************************************/
926 /*********** formatteriterator ******************************************/
927 /************************************************************************/
929 /* This is used to implement string.Formatter.vparse(). It exists so
930 Formatter can share code with the built in unicode.format() method.
931 It's really just a wrapper around MarkupIterator that is callable
932 from Python. */
934 typedef struct {
935 PyObject_HEAD
937 STRINGLIB_OBJECT *str;
939 MarkupIterator it_markup;
940 } formatteriterobject;
942 static void
943 formatteriter_dealloc(formatteriterobject *it)
945 Py_XDECREF(it->str);
946 PyObject_FREE(it);
949 /* returns a tuple:
950 (literal, field_name, format_spec, conversion)
952 literal is any literal text to output. might be zero length
953 field_name is the string before the ':'. might be None
954 format_spec is the string after the ':'. mibht be None
955 conversion is either None, or the string after the '!'
957 static PyObject *
958 formatteriter_next(formatteriterobject *it)
960 SubString literal;
961 SubString field_name;
962 SubString format_spec;
963 STRINGLIB_CHAR conversion;
964 int format_spec_needs_expanding;
965 int result = MarkupIterator_next(&it->it_markup, &literal, &field_name,
966 &format_spec, &conversion,
967 &format_spec_needs_expanding);
969 /* all of the SubString objects point into it->str, so no
970 memory management needs to be done on them */
971 assert(0 <= result && result <= 2);
972 if (result == 0 || result == 1)
973 /* if 0, error has already been set, if 1, iterator is empty */
974 return NULL;
975 else {
976 PyObject *literal_str = NULL;
977 PyObject *field_name_str = NULL;
978 PyObject *format_spec_str = NULL;
979 PyObject *conversion_str = NULL;
980 PyObject *tuple = NULL;
981 int has_field = field_name.ptr != field_name.end;
983 literal_str = SubString_new_object(&literal);
984 if (literal_str == NULL)
985 goto done;
987 field_name_str = SubString_new_object(&field_name);
988 if (field_name_str == NULL)
989 goto done;
991 /* if field_name is non-zero length, return a string for
992 format_spec (even if zero length), else return None */
993 format_spec_str = (has_field ?
994 SubString_new_object_or_empty :
995 SubString_new_object)(&format_spec);
996 if (format_spec_str == NULL)
997 goto done;
999 /* if the conversion is not specified, return a None,
1000 otherwise create a one length string with the conversion
1001 character */
1002 if (conversion == '\0') {
1003 conversion_str = Py_None;
1004 Py_INCREF(conversion_str);
1006 else
1007 conversion_str = STRINGLIB_NEW(&conversion, 1);
1008 if (conversion_str == NULL)
1009 goto done;
1011 tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str,
1012 conversion_str);
1013 done:
1014 Py_XDECREF(literal_str);
1015 Py_XDECREF(field_name_str);
1016 Py_XDECREF(format_spec_str);
1017 Py_XDECREF(conversion_str);
1018 return tuple;
1022 static PyMethodDef formatteriter_methods[] = {
1023 {NULL, NULL} /* sentinel */
1026 static PyTypeObject PyFormatterIter_Type = {
1027 PyVarObject_HEAD_INIT(&PyType_Type, 0)
1028 "formatteriterator", /* tp_name */
1029 sizeof(formatteriterobject), /* tp_basicsize */
1030 0, /* tp_itemsize */
1031 /* methods */
1032 (destructor)formatteriter_dealloc, /* tp_dealloc */
1033 0, /* tp_print */
1034 0, /* tp_getattr */
1035 0, /* tp_setattr */
1036 0, /* tp_compare */
1037 0, /* tp_repr */
1038 0, /* tp_as_number */
1039 0, /* tp_as_sequence */
1040 0, /* tp_as_mapping */
1041 0, /* tp_hash */
1042 0, /* tp_call */
1043 0, /* tp_str */
1044 PyObject_GenericGetAttr, /* tp_getattro */
1045 0, /* tp_setattro */
1046 0, /* tp_as_buffer */
1047 Py_TPFLAGS_DEFAULT, /* tp_flags */
1048 0, /* tp_doc */
1049 0, /* tp_traverse */
1050 0, /* tp_clear */
1051 0, /* tp_richcompare */
1052 0, /* tp_weaklistoffset */
1053 PyObject_SelfIter, /* tp_iter */
1054 (iternextfunc)formatteriter_next, /* tp_iternext */
1055 formatteriter_methods, /* tp_methods */
1059 /* unicode_formatter_parser is used to implement
1060 string.Formatter.vformat. it parses a string and returns tuples
1061 describing the parsed elements. It's a wrapper around
1062 stringlib/string_format.h's MarkupIterator */
1063 static PyObject *
1064 formatter_parser(STRINGLIB_OBJECT *self)
1066 formatteriterobject *it;
1068 it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
1069 if (it == NULL)
1070 return NULL;
1072 /* take ownership, give the object to the iterator */
1073 Py_INCREF(self);
1074 it->str = self;
1076 /* initialize the contained MarkupIterator */
1077 MarkupIterator_init(&it->it_markup,
1078 STRINGLIB_STR(self),
1079 STRINGLIB_LEN(self));
1081 return (PyObject *)it;
1085 /************************************************************************/
1086 /*********** fieldnameiterator ******************************************/
1087 /************************************************************************/
1090 /* This is used to implement string.Formatter.vparse(). It parses the
1091 field name into attribute and item values. It's a Python-callable
1092 wrapper around FieldNameIterator */
1094 typedef struct {
1095 PyObject_HEAD
1097 STRINGLIB_OBJECT *str;
1099 FieldNameIterator it_field;
1100 } fieldnameiterobject;
1102 static void
1103 fieldnameiter_dealloc(fieldnameiterobject *it)
1105 Py_XDECREF(it->str);
1106 PyObject_FREE(it);
1109 /* returns a tuple:
1110 (is_attr, value)
1111 is_attr is true if we used attribute syntax (e.g., '.foo')
1112 false if we used index syntax (e.g., '[foo]')
1113 value is an integer or string
1115 static PyObject *
1116 fieldnameiter_next(fieldnameiterobject *it)
1118 int result;
1119 int is_attr;
1120 Py_ssize_t idx;
1121 SubString name;
1123 result = FieldNameIterator_next(&it->it_field, &is_attr,
1124 &idx, &name);
1125 if (result == 0 || result == 1)
1126 /* if 0, error has already been set, if 1, iterator is empty */
1127 return NULL;
1128 else {
1129 PyObject* result = NULL;
1130 PyObject* is_attr_obj = NULL;
1131 PyObject* obj = NULL;
1133 is_attr_obj = PyBool_FromLong(is_attr);
1134 if (is_attr_obj == NULL)
1135 goto done;
1137 /* either an integer or a string */
1138 if (idx != -1)
1139 obj = PyLong_FromSsize_t(idx);
1140 else
1141 obj = SubString_new_object(&name);
1142 if (obj == NULL)
1143 goto done;
1145 /* return a tuple of values */
1146 result = PyTuple_Pack(2, is_attr_obj, obj);
1148 done:
1149 Py_XDECREF(is_attr_obj);
1150 Py_XDECREF(obj);
1151 return result;
1155 static PyMethodDef fieldnameiter_methods[] = {
1156 {NULL, NULL} /* sentinel */
1159 static PyTypeObject PyFieldNameIter_Type = {
1160 PyVarObject_HEAD_INIT(&PyType_Type, 0)
1161 "fieldnameiterator", /* tp_name */
1162 sizeof(fieldnameiterobject), /* tp_basicsize */
1163 0, /* tp_itemsize */
1164 /* methods */
1165 (destructor)fieldnameiter_dealloc, /* tp_dealloc */
1166 0, /* tp_print */
1167 0, /* tp_getattr */
1168 0, /* tp_setattr */
1169 0, /* tp_compare */
1170 0, /* tp_repr */
1171 0, /* tp_as_number */
1172 0, /* tp_as_sequence */
1173 0, /* tp_as_mapping */
1174 0, /* tp_hash */
1175 0, /* tp_call */
1176 0, /* tp_str */
1177 PyObject_GenericGetAttr, /* tp_getattro */
1178 0, /* tp_setattro */
1179 0, /* tp_as_buffer */
1180 Py_TPFLAGS_DEFAULT, /* tp_flags */
1181 0, /* tp_doc */
1182 0, /* tp_traverse */
1183 0, /* tp_clear */
1184 0, /* tp_richcompare */
1185 0, /* tp_weaklistoffset */
1186 PyObject_SelfIter, /* tp_iter */
1187 (iternextfunc)fieldnameiter_next, /* tp_iternext */
1188 fieldnameiter_methods, /* tp_methods */
1191 /* unicode_formatter_field_name_split is used to implement
1192 string.Formatter.vformat. it takes an PEP 3101 "field name", and
1193 returns a tuple of (first, rest): "first", the part before the
1194 first '.' or '['; and "rest", an iterator for the rest of the field
1195 name. it's a wrapper around stringlib/string_format.h's
1196 field_name_split. The iterator it returns is a
1197 FieldNameIterator */
1198 static PyObject *
1199 formatter_field_name_split(STRINGLIB_OBJECT *self)
1201 SubString first;
1202 Py_ssize_t first_idx;
1203 fieldnameiterobject *it;
1205 PyObject *first_obj = NULL;
1206 PyObject *result = NULL;
1208 it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
1209 if (it == NULL)
1210 return NULL;
1212 /* take ownership, give the object to the iterator. this is
1213 just to keep the field_name alive */
1214 Py_INCREF(self);
1215 it->str = self;
1217 if (!field_name_split(STRINGLIB_STR(self),
1218 STRINGLIB_LEN(self),
1219 &first, &first_idx, &it->it_field))
1220 goto done;
1222 /* first becomes an integer, if possible; else a string */
1223 if (first_idx != -1)
1224 first_obj = PyLong_FromSsize_t(first_idx);
1225 else
1226 /* convert "first" into a string object */
1227 first_obj = SubString_new_object(&first);
1228 if (first_obj == NULL)
1229 goto done;
1231 /* return a tuple of values */
1232 result = PyTuple_Pack(2, first_obj, it);
1234 done:
1235 Py_XDECREF(it);
1236 Py_XDECREF(first_obj);
1237 return result;