Issue #7117, continued: Remove substitution of %g-style formatting for
[python.git] / Objects / stringlib / formatter.h
blobf09578fa13d7f5ac3fd6242c1a84bdfc74097008
1 /* implements the string, long, and float formatters. that is,
2 string.__format__, etc. */
4 #include <locale.h>
6 /* Before including this, you must include either:
7 stringlib/unicodedefs.h
8 stringlib/stringdefs.h
10 Also, you should define the names:
11 FORMAT_STRING
12 FORMAT_LONG
13 FORMAT_FLOAT
14 FORMAT_COMPLEX
15 to be whatever you want the public names of these functions to
16 be. These are the only non-static functions defined here.
19 /* Raises an exception about an unknown presentation type for this
20 * type. */
22 static void
23 unknown_presentation_type(STRINGLIB_CHAR presentation_type,
24 const char* type_name)
26 #if STRINGLIB_IS_UNICODE
27 /* If STRINGLIB_CHAR is Py_UNICODE, %c might be out-of-range,
28 hence the two cases. If it is char, gcc complains that the
29 condition below is always true, hence the ifdef. */
30 if (presentation_type > 32 && presentation_type < 128)
31 #endif
32 PyErr_Format(PyExc_ValueError,
33 "Unknown format code '%c' "
34 "for object of type '%.200s'",
35 (char)presentation_type,
36 type_name);
37 #if STRINGLIB_IS_UNICODE
38 else
39 PyErr_Format(PyExc_ValueError,
40 "Unknown format code '\\x%x' "
41 "for object of type '%.200s'",
42 (unsigned int)presentation_type,
43 type_name);
44 #endif
47 static void
48 invalid_comma_type(STRINGLIB_CHAR presentation_type)
50 #if STRINGLIB_IS_UNICODE
51 /* See comment in unknown_presentation_type */
52 if (presentation_type > 32 && presentation_type < 128)
53 #endif
54 PyErr_Format(PyExc_ValueError,
55 "Cannot specify ',' with '%c'.",
56 (char)presentation_type);
57 #if STRINGLIB_IS_UNICODE
58 else
59 PyErr_Format(PyExc_ValueError,
60 "Cannot specify ',' with '\\x%x'.",
61 (unsigned int)presentation_type);
62 #endif
66 get_integer consumes 0 or more decimal digit characters from an
67 input string, updates *result with the corresponding positive
68 integer, and returns the number of digits consumed.
70 returns -1 on error.
72 static int
73 get_integer(STRINGLIB_CHAR **ptr, STRINGLIB_CHAR *end,
74 Py_ssize_t *result)
76 Py_ssize_t accumulator, digitval, oldaccumulator;
77 int numdigits;
78 accumulator = numdigits = 0;
79 for (;;(*ptr)++, numdigits++) {
80 if (*ptr >= end)
81 break;
82 digitval = STRINGLIB_TODECIMAL(**ptr);
83 if (digitval < 0)
84 break;
86 This trick was copied from old Unicode format code. It's cute,
87 but would really suck on an old machine with a slow divide
88 implementation. Fortunately, in the normal case we do not
89 expect too many digits.
91 oldaccumulator = accumulator;
92 accumulator *= 10;
93 if ((accumulator+10)/10 != oldaccumulator+1) {
94 PyErr_Format(PyExc_ValueError,
95 "Too many decimal digits in format string");
96 return -1;
98 accumulator += digitval;
100 *result = accumulator;
101 return numdigits;
104 /************************************************************************/
105 /*********** standard format specifier parsing **************************/
106 /************************************************************************/
108 /* returns true if this character is a specifier alignment token */
109 Py_LOCAL_INLINE(int)
110 is_alignment_token(STRINGLIB_CHAR c)
112 switch (c) {
113 case '<': case '>': case '=': case '^':
114 return 1;
115 default:
116 return 0;
120 /* returns true if this character is a sign element */
121 Py_LOCAL_INLINE(int)
122 is_sign_element(STRINGLIB_CHAR c)
124 switch (c) {
125 case ' ': case '+': case '-':
126 return 1;
127 default:
128 return 0;
133 typedef struct {
134 STRINGLIB_CHAR fill_char;
135 STRINGLIB_CHAR align;
136 int alternate;
137 STRINGLIB_CHAR sign;
138 Py_ssize_t width;
139 int thousands_separators;
140 Py_ssize_t precision;
141 STRINGLIB_CHAR type;
142 } InternalFormatSpec;
145 ptr points to the start of the format_spec, end points just past its end.
146 fills in format with the parsed information.
147 returns 1 on success, 0 on failure.
148 if failure, sets the exception
150 static int
151 parse_internal_render_format_spec(STRINGLIB_CHAR *format_spec,
152 Py_ssize_t format_spec_len,
153 InternalFormatSpec *format,
154 char default_type)
156 STRINGLIB_CHAR *ptr = format_spec;
157 STRINGLIB_CHAR *end = format_spec + format_spec_len;
159 /* end-ptr is used throughout this code to specify the length of
160 the input string */
162 Py_ssize_t consumed;
164 format->fill_char = '\0';
165 format->align = '\0';
166 format->alternate = 0;
167 format->sign = '\0';
168 format->width = -1;
169 format->thousands_separators = 0;
170 format->precision = -1;
171 format->type = default_type;
173 /* If the second char is an alignment token,
174 then parse the fill char */
175 if (end-ptr >= 2 && is_alignment_token(ptr[1])) {
176 format->align = ptr[1];
177 format->fill_char = ptr[0];
178 ptr += 2;
180 else if (end-ptr >= 1 && is_alignment_token(ptr[0])) {
181 format->align = ptr[0];
182 ++ptr;
185 /* Parse the various sign options */
186 if (end-ptr >= 1 && is_sign_element(ptr[0])) {
187 format->sign = ptr[0];
188 ++ptr;
191 /* If the next character is #, we're in alternate mode. This only
192 applies to integers. */
193 if (end-ptr >= 1 && ptr[0] == '#') {
194 format->alternate = 1;
195 ++ptr;
198 /* The special case for 0-padding (backwards compat) */
199 if (format->fill_char == '\0' && end-ptr >= 1 && ptr[0] == '0') {
200 format->fill_char = '0';
201 if (format->align == '\0') {
202 format->align = '=';
204 ++ptr;
207 consumed = get_integer(&ptr, end, &format->width);
208 if (consumed == -1)
209 /* Overflow error. Exception already set. */
210 return 0;
212 /* If consumed is 0, we didn't consume any characters for the
213 width. In that case, reset the width to -1, because
214 get_integer() will have set it to zero. -1 is how we record
215 that the width wasn't specified. */
216 if (consumed == 0)
217 format->width = -1;
219 /* Comma signifies add thousands separators */
220 if (end-ptr && ptr[0] == ',') {
221 format->thousands_separators = 1;
222 ++ptr;
225 /* Parse field precision */
226 if (end-ptr && ptr[0] == '.') {
227 ++ptr;
229 consumed = get_integer(&ptr, end, &format->precision);
230 if (consumed == -1)
231 /* Overflow error. Exception already set. */
232 return 0;
234 /* Not having a precision after a dot is an error. */
235 if (consumed == 0) {
236 PyErr_Format(PyExc_ValueError,
237 "Format specifier missing precision");
238 return 0;
243 /* Finally, parse the type field. */
245 if (end-ptr > 1) {
246 /* More than one char remain, invalid conversion spec. */
247 PyErr_Format(PyExc_ValueError, "Invalid conversion specification");
248 return 0;
251 if (end-ptr == 1) {
252 format->type = ptr[0];
253 ++ptr;
256 /* Do as much validating as we can, just by looking at the format
257 specifier. Do not take into account what type of formatting
258 we're doing (int, float, string). */
260 if (format->thousands_separators) {
261 switch (format->type) {
262 case 'd':
263 case 'e':
264 case 'f':
265 case 'g':
266 case 'E':
267 case 'G':
268 case '%':
269 case 'F':
270 case '\0':
271 /* These are allowed. See PEP 378.*/
272 break;
273 default:
274 invalid_comma_type(format->type);
275 return 0;
279 return 1;
282 /* Calculate the padding needed. */
283 static void
284 calc_padding(Py_ssize_t nchars, Py_ssize_t width, STRINGLIB_CHAR align,
285 Py_ssize_t *n_lpadding, Py_ssize_t *n_rpadding,
286 Py_ssize_t *n_total)
288 if (width >= 0) {
289 if (nchars > width)
290 *n_total = nchars;
291 else
292 *n_total = width;
294 else {
295 /* not specified, use all of the chars and no more */
296 *n_total = nchars;
299 /* figure out how much leading space we need, based on the
300 aligning */
301 if (align == '>')
302 *n_lpadding = *n_total - nchars;
303 else if (align == '^')
304 *n_lpadding = (*n_total - nchars) / 2;
305 else
306 *n_lpadding = 0;
308 *n_rpadding = *n_total - nchars - *n_lpadding;
311 /* Do the padding, and return a pointer to where the caller-supplied
312 content goes. */
313 static STRINGLIB_CHAR *
314 fill_padding(STRINGLIB_CHAR *p, Py_ssize_t nchars, STRINGLIB_CHAR fill_char,
315 Py_ssize_t n_lpadding, Py_ssize_t n_rpadding)
317 /* Pad on left. */
318 if (n_lpadding)
319 STRINGLIB_FILL(p, fill_char, n_lpadding);
321 /* Pad on right. */
322 if (n_rpadding)
323 STRINGLIB_FILL(p + nchars + n_lpadding, fill_char, n_rpadding);
325 /* Pointer to the user content. */
326 return p + n_lpadding;
329 #if defined FORMAT_FLOAT || defined FORMAT_LONG || defined FORMAT_COMPLEX
330 /************************************************************************/
331 /*********** common routines for numeric formatting *********************/
332 /************************************************************************/
334 /* Locale type codes. */
335 #define LT_CURRENT_LOCALE 0
336 #define LT_DEFAULT_LOCALE 1
337 #define LT_NO_LOCALE 2
339 /* Locale info needed for formatting integers and the part of floats
340 before and including the decimal. Note that locales only support
341 8-bit chars, not unicode. */
342 typedef struct {
343 char *decimal_point;
344 char *thousands_sep;
345 char *grouping;
346 } LocaleInfo;
348 /* describes the layout for an integer, see the comment in
349 calc_number_widths() for details */
350 typedef struct {
351 Py_ssize_t n_lpadding;
352 Py_ssize_t n_prefix;
353 Py_ssize_t n_spadding;
354 Py_ssize_t n_rpadding;
355 char sign;
356 Py_ssize_t n_sign; /* number of digits needed for sign (0/1) */
357 Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
358 any grouping chars. */
359 Py_ssize_t n_decimal; /* 0 if only an integer */
360 Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
361 excluding the decimal itself, if
362 present. */
364 /* These 2 are not the widths of fields, but are needed by
365 STRINGLIB_GROUPING. */
366 Py_ssize_t n_digits; /* The number of digits before a decimal
367 or exponent. */
368 Py_ssize_t n_min_width; /* The min_width we used when we computed
369 the n_grouped_digits width. */
370 } NumberFieldWidths;
373 /* Given a number of the form:
374 digits[remainder]
375 where ptr points to the start and end points to the end, find where
376 the integer part ends. This could be a decimal, an exponent, both,
377 or neither.
378 If a decimal point is present, set *has_decimal and increment
379 remainder beyond it.
380 Results are undefined (but shouldn't crash) for improperly
381 formatted strings.
383 static void
384 parse_number(STRINGLIB_CHAR *ptr, Py_ssize_t len,
385 Py_ssize_t *n_remainder, int *has_decimal)
387 STRINGLIB_CHAR *end = ptr + len;
388 STRINGLIB_CHAR *remainder;
390 while (ptr<end && isdigit(*ptr))
391 ++ptr;
392 remainder = ptr;
394 /* Does remainder start with a decimal point? */
395 *has_decimal = ptr<end && *remainder == '.';
397 /* Skip the decimal point. */
398 if (*has_decimal)
399 remainder++;
401 *n_remainder = end - remainder;
404 /* not all fields of format are used. for example, precision is
405 unused. should this take discrete params in order to be more clear
406 about what it does? or is passing a single format parameter easier
407 and more efficient enough to justify a little obfuscation? */
408 static Py_ssize_t
409 calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
410 STRINGLIB_CHAR sign_char, STRINGLIB_CHAR *number,
411 Py_ssize_t n_number, Py_ssize_t n_remainder,
412 int has_decimal, const LocaleInfo *locale,
413 const InternalFormatSpec *format)
415 Py_ssize_t n_non_digit_non_padding;
416 Py_ssize_t n_padding;
418 spec->n_digits = n_number - n_remainder - (has_decimal?1:0);
419 spec->n_lpadding = 0;
420 spec->n_prefix = n_prefix;
421 spec->n_decimal = has_decimal ? strlen(locale->decimal_point) : 0;
422 spec->n_remainder = n_remainder;
423 spec->n_spadding = 0;
424 spec->n_rpadding = 0;
425 spec->sign = '\0';
426 spec->n_sign = 0;
428 /* the output will look like:
430 | <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> |
433 sign is computed from format->sign and the actual
434 sign of the number
436 prefix is given (it's for the '0x' prefix)
438 digits is already known
440 the total width is either given, or computed from the
441 actual digits
443 only one of lpadding, spadding, and rpadding can be non-zero,
444 and it's calculated from the width and other fields
447 /* compute the various parts we're going to write */
448 switch (format->sign) {
449 case '+':
450 /* always put a + or - */
451 spec->n_sign = 1;
452 spec->sign = (sign_char == '-' ? '-' : '+');
453 break;
454 case ' ':
455 spec->n_sign = 1;
456 spec->sign = (sign_char == '-' ? '-' : ' ');
457 break;
458 default:
459 /* Not specified, or the default (-) */
460 if (sign_char == '-') {
461 spec->n_sign = 1;
462 spec->sign = '-';
466 /* The number of chars used for non-digits and non-padding. */
467 n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
468 spec->n_remainder;
470 /* min_width can go negative, that's okay. format->width == -1 means
471 we don't care. */
472 if (format->fill_char == '0')
473 spec->n_min_width = format->width - n_non_digit_non_padding;
474 else
475 spec->n_min_width = 0;
477 if (spec->n_digits == 0)
478 /* This case only occurs when using 'c' formatting, we need
479 to special case it because the grouping code always wants
480 to have at least one character. */
481 spec->n_grouped_digits = 0;
482 else
483 spec->n_grouped_digits = STRINGLIB_GROUPING(NULL, 0, NULL,
484 spec->n_digits,
485 spec->n_min_width,
486 locale->grouping,
487 locale->thousands_sep);
489 /* Given the desired width and the total of digit and non-digit
490 space we consume, see if we need any padding. format->width can
491 be negative (meaning no padding), but this code still works in
492 that case. */
493 n_padding = format->width -
494 (n_non_digit_non_padding + spec->n_grouped_digits);
495 if (n_padding > 0) {
496 /* Some padding is needed. Determine if it's left, space, or right. */
497 switch (format->align) {
498 case '<':
499 spec->n_rpadding = n_padding;
500 break;
501 case '^':
502 spec->n_lpadding = n_padding / 2;
503 spec->n_rpadding = n_padding - spec->n_lpadding;
504 break;
505 case '=':
506 spec->n_spadding = n_padding;
507 break;
508 default:
509 /* Handles '>', plus catch-all just in case. */
510 spec->n_lpadding = n_padding;
511 break;
514 return spec->n_lpadding + spec->n_sign + spec->n_prefix +
515 spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
516 spec->n_remainder + spec->n_rpadding;
519 /* Fill in the digit parts of a numbers's string representation,
520 as determined in calc_number_widths().
521 No error checking, since we know the buffer is the correct size. */
522 static void
523 fill_number(STRINGLIB_CHAR *buf, const NumberFieldWidths *spec,
524 STRINGLIB_CHAR *digits, Py_ssize_t n_digits,
525 STRINGLIB_CHAR *prefix, STRINGLIB_CHAR fill_char,
526 LocaleInfo *locale, int toupper)
528 /* Used to keep track of digits, decimal, and remainder. */
529 STRINGLIB_CHAR *p = digits;
531 #ifndef NDEBUG
532 Py_ssize_t r;
533 #endif
535 if (spec->n_lpadding) {
536 STRINGLIB_FILL(buf, fill_char, spec->n_lpadding);
537 buf += spec->n_lpadding;
539 if (spec->n_sign == 1) {
540 *buf++ = spec->sign;
542 if (spec->n_prefix) {
543 memmove(buf,
544 prefix,
545 spec->n_prefix * sizeof(STRINGLIB_CHAR));
546 if (toupper) {
547 Py_ssize_t t;
548 for (t = 0; t < spec->n_prefix; ++t)
549 buf[t] = STRINGLIB_TOUPPER(buf[t]);
551 buf += spec->n_prefix;
553 if (spec->n_spadding) {
554 STRINGLIB_FILL(buf, fill_char, spec->n_spadding);
555 buf += spec->n_spadding;
558 /* Only for type 'c' special case, it has no digits. */
559 if (spec->n_digits != 0) {
560 /* Fill the digits with InsertThousandsGrouping. */
561 #ifndef NDEBUG
563 #endif
564 STRINGLIB_GROUPING(buf, spec->n_grouped_digits, digits,
565 spec->n_digits, spec->n_min_width,
566 locale->grouping, locale->thousands_sep);
567 #ifndef NDEBUG
568 assert(r == spec->n_grouped_digits);
569 #endif
570 p += spec->n_digits;
572 if (toupper) {
573 Py_ssize_t t;
574 for (t = 0; t < spec->n_grouped_digits; ++t)
575 buf[t] = STRINGLIB_TOUPPER(buf[t]);
577 buf += spec->n_grouped_digits;
579 if (spec->n_decimal) {
580 Py_ssize_t t;
581 for (t = 0; t < spec->n_decimal; ++t)
582 buf[t] = locale->decimal_point[t];
583 buf += spec->n_decimal;
584 p += 1;
587 if (spec->n_remainder) {
588 memcpy(buf, p, spec->n_remainder * sizeof(STRINGLIB_CHAR));
589 buf += spec->n_remainder;
590 p += spec->n_remainder;
593 if (spec->n_rpadding) {
594 STRINGLIB_FILL(buf, fill_char, spec->n_rpadding);
595 buf += spec->n_rpadding;
599 static char no_grouping[1] = {CHAR_MAX};
601 /* Find the decimal point character(s?), thousands_separator(s?), and
602 grouping description, either for the current locale if type is
603 LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE, or
604 none if LT_NO_LOCALE. */
605 static void
606 get_locale_info(int type, LocaleInfo *locale_info)
608 switch (type) {
609 case LT_CURRENT_LOCALE: {
610 struct lconv *locale_data = localeconv();
611 locale_info->decimal_point = locale_data->decimal_point;
612 locale_info->thousands_sep = locale_data->thousands_sep;
613 locale_info->grouping = locale_data->grouping;
614 break;
616 case LT_DEFAULT_LOCALE:
617 locale_info->decimal_point = ".";
618 locale_info->thousands_sep = ",";
619 locale_info->grouping = "\3"; /* Group every 3 characters,
620 trailing 0 means repeat
621 infinitely. */
622 break;
623 case LT_NO_LOCALE:
624 locale_info->decimal_point = ".";
625 locale_info->thousands_sep = "";
626 locale_info->grouping = no_grouping;
627 break;
628 default:
629 assert(0);
633 #endif /* FORMAT_FLOAT || FORMAT_LONG || FORMAT_COMPLEX */
635 /************************************************************************/
636 /*********** string formatting ******************************************/
637 /************************************************************************/
639 static PyObject *
640 format_string_internal(PyObject *value, const InternalFormatSpec *format)
642 Py_ssize_t lpad;
643 Py_ssize_t rpad;
644 Py_ssize_t total;
645 STRINGLIB_CHAR *p;
646 Py_ssize_t len = STRINGLIB_LEN(value);
647 PyObject *result = NULL;
649 /* sign is not allowed on strings */
650 if (format->sign != '\0') {
651 PyErr_SetString(PyExc_ValueError,
652 "Sign not allowed in string format specifier");
653 goto done;
656 /* alternate is not allowed on strings */
657 if (format->alternate) {
658 PyErr_SetString(PyExc_ValueError,
659 "Alternate form (#) not allowed in string format "
660 "specifier");
661 goto done;
664 /* '=' alignment not allowed on strings */
665 if (format->align == '=') {
666 PyErr_SetString(PyExc_ValueError,
667 "'=' alignment not allowed "
668 "in string format specifier");
669 goto done;
672 /* if precision is specified, output no more that format.precision
673 characters */
674 if (format->precision >= 0 && len >= format->precision) {
675 len = format->precision;
678 calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
680 /* allocate the resulting string */
681 result = STRINGLIB_NEW(NULL, total);
682 if (result == NULL)
683 goto done;
685 /* Write into that space. First the padding. */
686 p = fill_padding(STRINGLIB_STR(result), len,
687 format->fill_char=='\0'?' ':format->fill_char,
688 lpad, rpad);
690 /* Then the source string. */
691 memcpy(p, STRINGLIB_STR(value), len * sizeof(STRINGLIB_CHAR));
693 done:
694 return result;
698 /************************************************************************/
699 /*********** long formatting ********************************************/
700 /************************************************************************/
702 #if defined FORMAT_LONG || defined FORMAT_INT
703 typedef PyObject*
704 (*IntOrLongToString)(PyObject *value, int base);
706 static PyObject *
707 format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
708 IntOrLongToString tostring)
710 PyObject *result = NULL;
711 PyObject *tmp = NULL;
712 STRINGLIB_CHAR *pnumeric_chars;
713 STRINGLIB_CHAR numeric_char;
714 STRINGLIB_CHAR sign_char = '\0';
715 Py_ssize_t n_digits; /* count of digits need from the computed
716 string */
717 Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
718 produces non-digits */
719 Py_ssize_t n_prefix = 0; /* Count of prefix chars, (e.g., '0x') */
720 Py_ssize_t n_total;
721 STRINGLIB_CHAR *prefix = NULL;
722 NumberFieldWidths spec;
723 long x;
725 /* Locale settings, either from the actual locale or
726 from a hard-code pseudo-locale */
727 LocaleInfo locale;
729 /* no precision allowed on integers */
730 if (format->precision != -1) {
731 PyErr_SetString(PyExc_ValueError,
732 "Precision not allowed in integer format specifier");
733 goto done;
736 /* special case for character formatting */
737 if (format->type == 'c') {
738 /* error to specify a sign */
739 if (format->sign != '\0') {
740 PyErr_SetString(PyExc_ValueError,
741 "Sign not allowed with integer"
742 " format specifier 'c'");
743 goto done;
746 /* Error to specify a comma. */
747 if (format->thousands_separators) {
748 PyErr_SetString(PyExc_ValueError,
749 "Thousands separators not allowed with integer"
750 " format specifier 'c'");
751 goto done;
754 /* taken from unicodeobject.c formatchar() */
755 /* Integer input truncated to a character */
756 /* XXX: won't work for int */
757 x = PyLong_AsLong(value);
758 if (x == -1 && PyErr_Occurred())
759 goto done;
760 #ifdef Py_UNICODE_WIDE
761 if (x < 0 || x > 0x10ffff) {
762 PyErr_SetString(PyExc_OverflowError,
763 "%c arg not in range(0x110000) "
764 "(wide Python build)");
765 goto done;
767 #else
768 if (x < 0 || x > 0xffff) {
769 PyErr_SetString(PyExc_OverflowError,
770 "%c arg not in range(0x10000) "
771 "(narrow Python build)");
772 goto done;
774 #endif
775 numeric_char = (STRINGLIB_CHAR)x;
776 pnumeric_chars = &numeric_char;
777 n_digits = 1;
779 /* As a sort-of hack, we tell calc_number_widths that we only
780 have "remainder" characters. calc_number_widths thinks
781 these are characters that don't get formatted, only copied
782 into the output string. We do this for 'c' formatting,
783 because the characters are likely to be non-digits. */
784 n_remainder = 1;
786 else {
787 int base;
788 int leading_chars_to_skip = 0; /* Number of characters added by
789 PyNumber_ToBase that we want to
790 skip over. */
792 /* Compute the base and how many characters will be added by
793 PyNumber_ToBase */
794 switch (format->type) {
795 case 'b':
796 base = 2;
797 leading_chars_to_skip = 2; /* 0b */
798 break;
799 case 'o':
800 base = 8;
801 leading_chars_to_skip = 2; /* 0o */
802 break;
803 case 'x':
804 case 'X':
805 base = 16;
806 leading_chars_to_skip = 2; /* 0x */
807 break;
808 default: /* shouldn't be needed, but stops a compiler warning */
809 case 'd':
810 case 'n':
811 base = 10;
812 break;
815 /* The number of prefix chars is the same as the leading
816 chars to skip */
817 if (format->alternate)
818 n_prefix = leading_chars_to_skip;
820 /* Do the hard part, converting to a string in a given base */
821 tmp = tostring(value, base);
822 if (tmp == NULL)
823 goto done;
825 pnumeric_chars = STRINGLIB_STR(tmp);
826 n_digits = STRINGLIB_LEN(tmp);
828 prefix = pnumeric_chars;
830 /* Remember not to modify what pnumeric_chars points to. it
831 might be interned. Only modify it after we copy it into a
832 newly allocated output buffer. */
834 /* Is a sign character present in the output? If so, remember it
835 and skip it */
836 if (pnumeric_chars[0] == '-') {
837 sign_char = pnumeric_chars[0];
838 ++prefix;
839 ++leading_chars_to_skip;
842 /* Skip over the leading chars (0x, 0b, etc.) */
843 n_digits -= leading_chars_to_skip;
844 pnumeric_chars += leading_chars_to_skip;
847 /* Determine the grouping, separator, and decimal point, if any. */
848 get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
849 (format->thousands_separators ?
850 LT_DEFAULT_LOCALE :
851 LT_NO_LOCALE),
852 &locale);
854 /* Calculate how much memory we'll need. */
855 n_total = calc_number_widths(&spec, n_prefix, sign_char, pnumeric_chars,
856 n_digits, n_remainder, 0, &locale, format);
858 /* Allocate the memory. */
859 result = STRINGLIB_NEW(NULL, n_total);
860 if (!result)
861 goto done;
863 /* Populate the memory. */
864 fill_number(STRINGLIB_STR(result), &spec, pnumeric_chars, n_digits,
865 prefix, format->fill_char == '\0' ? ' ' : format->fill_char,
866 &locale, format->type == 'X');
868 done:
869 Py_XDECREF(tmp);
870 return result;
872 #endif /* defined FORMAT_LONG || defined FORMAT_INT */
874 /************************************************************************/
875 /*********** float formatting *******************************************/
876 /************************************************************************/
878 #ifdef FORMAT_FLOAT
879 #if STRINGLIB_IS_UNICODE
880 static void
881 strtounicode(Py_UNICODE *buffer, const char *charbuffer, Py_ssize_t len)
883 Py_ssize_t i;
884 for (i = 0; i < len; ++i)
885 buffer[i] = (Py_UNICODE)charbuffer[i];
887 #endif
889 /* much of this is taken from unicodeobject.c */
890 static PyObject *
891 format_float_internal(PyObject *value,
892 const InternalFormatSpec *format)
894 char *buf = NULL; /* buffer returned from PyOS_double_to_string */
895 Py_ssize_t n_digits;
896 Py_ssize_t n_remainder;
897 Py_ssize_t n_total;
898 int has_decimal;
899 double val;
900 Py_ssize_t precision = format->precision;
901 Py_ssize_t default_precision = 6;
902 STRINGLIB_CHAR type = format->type;
903 int add_pct = 0;
904 STRINGLIB_CHAR *p;
905 NumberFieldWidths spec;
906 int flags = 0;
907 PyObject *result = NULL;
908 STRINGLIB_CHAR sign_char = '\0';
909 int float_type; /* Used to see if we have a nan, inf, or regular float. */
911 #if STRINGLIB_IS_UNICODE
912 Py_UNICODE *unicode_tmp = NULL;
913 #endif
915 /* Locale settings, either from the actual locale or
916 from a hard-code pseudo-locale */
917 LocaleInfo locale;
919 /* Alternate is not allowed on floats. */
920 if (format->alternate) {
921 PyErr_SetString(PyExc_ValueError,
922 "Alternate form (#) not allowed in float format "
923 "specifier");
924 goto done;
927 if (type == '\0') {
928 /* Omitted type specifier. This is like 'g' but with at least one
929 digit after the decimal point, and different default precision.*/
930 type = 'g';
931 default_precision = PyFloat_STR_PRECISION;
932 flags |= Py_DTSF_ADD_DOT_0;
935 if (type == 'n')
936 /* 'n' is the same as 'g', except for the locale used to
937 format the result. We take care of that later. */
938 type = 'g';
940 #if PY_VERSION_HEX < 0x0301000
941 /* 'F' is the same as 'f', per the PEP */
942 /* This is no longer the case in 3.x */
943 if (type == 'F')
944 type = 'f';
945 #endif
947 val = PyFloat_AsDouble(value);
948 if (val == -1.0 && PyErr_Occurred())
949 goto done;
951 if (type == '%') {
952 type = 'f';
953 val *= 100;
954 add_pct = 1;
957 if (precision < 0)
958 precision = default_precision;
960 /* Cast "type", because if we're in unicode we need to pass a
961 8-bit char. This is safe, because we've restricted what "type"
962 can be. */
963 buf = PyOS_double_to_string(val, (char)type, precision, flags,
964 &float_type);
965 if (buf == NULL)
966 goto done;
967 n_digits = strlen(buf);
969 if (add_pct) {
970 /* We know that buf has a trailing zero (since we just called
971 strlen() on it), and we don't use that fact any more. So we
972 can just write over the trailing zero. */
973 buf[n_digits] = '%';
974 n_digits += 1;
977 /* Since there is no unicode version of PyOS_double_to_string,
978 just use the 8 bit version and then convert to unicode. */
979 #if STRINGLIB_IS_UNICODE
980 unicode_tmp = (Py_UNICODE*)PyMem_Malloc((n_digits)*sizeof(Py_UNICODE));
981 if (unicode_tmp == NULL) {
982 PyErr_NoMemory();
983 goto done;
985 strtounicode(unicode_tmp, buf, n_digits);
986 p = unicode_tmp;
987 #else
988 p = buf;
989 #endif
991 /* Is a sign character present in the output? If so, remember it
992 and skip it */
993 if (*p == '-') {
994 sign_char = *p;
995 ++p;
996 --n_digits;
999 /* Determine if we have any "remainder" (after the digits, might include
1000 decimal or exponent or both (or neither)) */
1001 parse_number(p, n_digits, &n_remainder, &has_decimal);
1003 /* Determine the grouping, separator, and decimal point, if any. */
1004 get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1005 (format->thousands_separators ?
1006 LT_DEFAULT_LOCALE :
1007 LT_NO_LOCALE),
1008 &locale);
1010 /* Calculate how much memory we'll need. */
1011 n_total = calc_number_widths(&spec, 0, sign_char, p, n_digits,
1012 n_remainder, has_decimal, &locale, format);
1014 /* Allocate the memory. */
1015 result = STRINGLIB_NEW(NULL, n_total);
1016 if (result == NULL)
1017 goto done;
1019 /* Populate the memory. */
1020 fill_number(STRINGLIB_STR(result), &spec, p, n_digits, NULL,
1021 format->fill_char == '\0' ? ' ' : format->fill_char, &locale,
1024 done:
1025 PyMem_Free(buf);
1026 #if STRINGLIB_IS_UNICODE
1027 PyMem_Free(unicode_tmp);
1028 #endif
1029 return result;
1031 #endif /* FORMAT_FLOAT */
1033 /************************************************************************/
1034 /*********** complex formatting *****************************************/
1035 /************************************************************************/
1037 #ifdef FORMAT_COMPLEX
1039 static PyObject *
1040 format_complex_internal(PyObject *value,
1041 const InternalFormatSpec *format)
1043 double re;
1044 double im;
1045 char *re_buf = NULL; /* buffer returned from PyOS_double_to_string */
1046 char *im_buf = NULL; /* buffer returned from PyOS_double_to_string */
1048 InternalFormatSpec tmp_format = *format;
1049 Py_ssize_t n_re_digits;
1050 Py_ssize_t n_im_digits;
1051 Py_ssize_t n_re_remainder;
1052 Py_ssize_t n_im_remainder;
1053 Py_ssize_t n_re_total;
1054 Py_ssize_t n_im_total;
1055 int re_has_decimal;
1056 int im_has_decimal;
1057 Py_ssize_t precision = format->precision;
1058 Py_ssize_t default_precision = 6;
1059 STRINGLIB_CHAR type = format->type;
1060 STRINGLIB_CHAR *p_re;
1061 STRINGLIB_CHAR *p_im;
1062 NumberFieldWidths re_spec;
1063 NumberFieldWidths im_spec;
1064 int flags = 0;
1065 PyObject *result = NULL;
1066 STRINGLIB_CHAR *p;
1067 STRINGLIB_CHAR re_sign_char = '\0';
1068 STRINGLIB_CHAR im_sign_char = '\0';
1069 int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
1070 int im_float_type;
1071 int add_parens = 0;
1072 int skip_re = 0;
1073 Py_ssize_t lpad;
1074 Py_ssize_t rpad;
1075 Py_ssize_t total;
1077 #if STRINGLIB_IS_UNICODE
1078 Py_UNICODE *re_unicode_tmp = NULL;
1079 Py_UNICODE *im_unicode_tmp = NULL;
1080 #endif
1082 /* Locale settings, either from the actual locale or
1083 from a hard-code pseudo-locale */
1084 LocaleInfo locale;
1086 /* Alternate is not allowed on complex. */
1087 if (format->alternate) {
1088 PyErr_SetString(PyExc_ValueError,
1089 "Alternate form (#) not allowed in complex format "
1090 "specifier");
1091 goto done;
1094 /* Neither is zero pading. */
1095 if (format->fill_char == '0') {
1096 PyErr_SetString(PyExc_ValueError,
1097 "Zero padding is not allowed in complex format "
1098 "specifier");
1099 goto done;
1102 /* Neither is '=' alignment . */
1103 if (format->align == '=') {
1104 PyErr_SetString(PyExc_ValueError,
1105 "'=' alignment flag is not allowed in complex format "
1106 "specifier");
1107 goto done;
1110 re = PyComplex_RealAsDouble(value);
1111 if (re == -1.0 && PyErr_Occurred())
1112 goto done;
1113 im = PyComplex_ImagAsDouble(value);
1114 if (im == -1.0 && PyErr_Occurred())
1115 goto done;
1117 if (type == '\0') {
1118 /* Omitted type specifier. Should be like str(self). */
1119 type = 'g';
1120 default_precision = PyFloat_STR_PRECISION;
1121 add_parens = 1;
1122 if (re == 0.0)
1123 skip_re = 1;
1126 if (type == 'n')
1127 /* 'n' is the same as 'g', except for the locale used to
1128 format the result. We take care of that later. */
1129 type = 'g';
1131 #if PY_VERSION_HEX < 0x03010000
1132 /* This is no longer the case in 3.x */
1133 /* 'F' is the same as 'f', per the PEP */
1134 if (type == 'F')
1135 type = 'f';
1136 #endif
1138 if (precision < 0)
1139 precision = default_precision;
1141 /* Cast "type", because if we're in unicode we need to pass a
1142 8-bit char. This is safe, because we've restricted what "type"
1143 can be. */
1144 re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
1145 &re_float_type);
1146 if (re_buf == NULL)
1147 goto done;
1148 im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
1149 &im_float_type);
1150 if (im_buf == NULL)
1151 goto done;
1153 n_re_digits = strlen(re_buf);
1154 n_im_digits = strlen(im_buf);
1156 /* Since there is no unicode version of PyOS_double_to_string,
1157 just use the 8 bit version and then convert to unicode. */
1158 #if STRINGLIB_IS_UNICODE
1159 re_unicode_tmp = (Py_UNICODE*)PyMem_Malloc((n_re_digits)*sizeof(Py_UNICODE));
1160 if (re_unicode_tmp == NULL) {
1161 PyErr_NoMemory();
1162 goto done;
1164 strtounicode(re_unicode_tmp, re_buf, n_re_digits);
1165 p_re = re_unicode_tmp;
1167 im_unicode_tmp = (Py_UNICODE*)PyMem_Malloc((n_im_digits)*sizeof(Py_UNICODE));
1168 if (im_unicode_tmp == NULL) {
1169 PyErr_NoMemory();
1170 goto done;
1172 strtounicode(im_unicode_tmp, im_buf, n_im_digits);
1173 p_im = im_unicode_tmp;
1174 #else
1175 p_re = re_buf;
1176 p_im = im_buf;
1177 #endif
1179 /* Is a sign character present in the output? If so, remember it
1180 and skip it */
1181 if (*p_re == '-') {
1182 re_sign_char = *p_re;
1183 ++p_re;
1184 --n_re_digits;
1186 if (*p_im == '-') {
1187 im_sign_char = *p_im;
1188 ++p_im;
1189 --n_im_digits;
1192 /* Determine if we have any "remainder" (after the digits, might include
1193 decimal or exponent or both (or neither)) */
1194 parse_number(p_re, n_re_digits, &n_re_remainder, &re_has_decimal);
1195 parse_number(p_im, n_im_digits, &n_im_remainder, &im_has_decimal);
1197 /* Determine the grouping, separator, and decimal point, if any. */
1198 get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1199 (format->thousands_separators ?
1200 LT_DEFAULT_LOCALE :
1201 LT_NO_LOCALE),
1202 &locale);
1204 /* Turn off any padding. We'll do it later after we've composed
1205 the numbers without padding. */
1206 tmp_format.fill_char = '\0';
1207 tmp_format.align = '\0';
1208 tmp_format.width = -1;
1210 /* Calculate how much memory we'll need. */
1211 n_re_total = calc_number_widths(&re_spec, 0, re_sign_char, p_re,
1212 n_re_digits, n_re_remainder,
1213 re_has_decimal, &locale, &tmp_format);
1215 /* Same formatting, but always include a sign. */
1216 tmp_format.sign = '+';
1217 n_im_total = calc_number_widths(&im_spec, 0, im_sign_char, p_im,
1218 n_im_digits, n_im_remainder,
1219 im_has_decimal, &locale, &tmp_format);
1221 if (skip_re)
1222 n_re_total = 0;
1224 /* Add 1 for the 'j', and optionally 2 for parens. */
1225 calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
1226 format->width, format->align, &lpad, &rpad, &total);
1228 result = STRINGLIB_NEW(NULL, total);
1229 if (result == NULL)
1230 goto done;
1232 /* Populate the memory. First, the padding. */
1233 p = fill_padding(STRINGLIB_STR(result),
1234 n_re_total + n_im_total + 1 + add_parens * 2,
1235 format->fill_char=='\0' ? ' ' : format->fill_char,
1236 lpad, rpad);
1238 if (add_parens)
1239 *p++ = '(';
1241 if (!skip_re) {
1242 fill_number(p, &re_spec, p_re, n_re_digits, NULL, 0, &locale, 0);
1243 p += n_re_total;
1245 fill_number(p, &im_spec, p_im, n_im_digits, NULL, 0, &locale, 0);
1246 p += n_im_total;
1247 *p++ = 'j';
1249 if (add_parens)
1250 *p++ = ')';
1252 done:
1253 PyMem_Free(re_buf);
1254 PyMem_Free(im_buf);
1255 #if STRINGLIB_IS_UNICODE
1256 PyMem_Free(re_unicode_tmp);
1257 PyMem_Free(im_unicode_tmp);
1258 #endif
1259 return result;
1261 #endif /* FORMAT_COMPLEX */
1263 /************************************************************************/
1264 /*********** built in formatters ****************************************/
1265 /************************************************************************/
1266 PyObject *
1267 FORMAT_STRING(PyObject *obj,
1268 STRINGLIB_CHAR *format_spec,
1269 Py_ssize_t format_spec_len)
1271 InternalFormatSpec format;
1272 PyObject *result = NULL;
1274 /* check for the special case of zero length format spec, make
1275 it equivalent to str(obj) */
1276 if (format_spec_len == 0) {
1277 result = STRINGLIB_TOSTR(obj);
1278 goto done;
1281 /* parse the format_spec */
1282 if (!parse_internal_render_format_spec(format_spec, format_spec_len,
1283 &format, 's'))
1284 goto done;
1286 /* type conversion? */
1287 switch (format.type) {
1288 case 's':
1289 /* no type conversion needed, already a string. do the formatting */
1290 result = format_string_internal(obj, &format);
1291 break;
1292 default:
1293 /* unknown */
1294 unknown_presentation_type(format.type, obj->ob_type->tp_name);
1295 goto done;
1298 done:
1299 return result;
1302 #if defined FORMAT_LONG || defined FORMAT_INT
1303 static PyObject*
1304 format_int_or_long(PyObject* obj,
1305 STRINGLIB_CHAR *format_spec,
1306 Py_ssize_t format_spec_len,
1307 IntOrLongToString tostring)
1309 PyObject *result = NULL;
1310 PyObject *tmp = NULL;
1311 InternalFormatSpec format;
1313 /* check for the special case of zero length format spec, make
1314 it equivalent to str(obj) */
1315 if (format_spec_len == 0) {
1316 result = STRINGLIB_TOSTR(obj);
1317 goto done;
1320 /* parse the format_spec */
1321 if (!parse_internal_render_format_spec(format_spec,
1322 format_spec_len,
1323 &format, 'd'))
1324 goto done;
1326 /* type conversion? */
1327 switch (format.type) {
1328 case 'b':
1329 case 'c':
1330 case 'd':
1331 case 'o':
1332 case 'x':
1333 case 'X':
1334 case 'n':
1335 /* no type conversion needed, already an int (or long). do
1336 the formatting */
1337 result = format_int_or_long_internal(obj, &format, tostring);
1338 break;
1340 case 'e':
1341 case 'E':
1342 case 'f':
1343 case 'F':
1344 case 'g':
1345 case 'G':
1346 case '%':
1347 /* convert to float */
1348 tmp = PyNumber_Float(obj);
1349 if (tmp == NULL)
1350 goto done;
1351 result = format_float_internal(tmp, &format);
1352 break;
1354 default:
1355 /* unknown */
1356 unknown_presentation_type(format.type, obj->ob_type->tp_name);
1357 goto done;
1360 done:
1361 Py_XDECREF(tmp);
1362 return result;
1364 #endif /* FORMAT_LONG || defined FORMAT_INT */
1366 #ifdef FORMAT_LONG
1367 /* Need to define long_format as a function that will convert a long
1368 to a string. In 3.0, _PyLong_Format has the correct signature. In
1369 2.x, we need to fudge a few parameters */
1370 #if PY_VERSION_HEX >= 0x03000000
1371 #define long_format _PyLong_Format
1372 #else
1373 static PyObject*
1374 long_format(PyObject* value, int base)
1376 /* Convert to base, don't add trailing 'L', and use the new octal
1377 format. We already know this is a long object */
1378 assert(PyLong_Check(value));
1379 /* convert to base, don't add 'L', and use the new octal format */
1380 return _PyLong_Format(value, base, 0, 1);
1382 #endif
1384 PyObject *
1385 FORMAT_LONG(PyObject *obj,
1386 STRINGLIB_CHAR *format_spec,
1387 Py_ssize_t format_spec_len)
1389 return format_int_or_long(obj, format_spec, format_spec_len,
1390 long_format);
1392 #endif /* FORMAT_LONG */
1394 #ifdef FORMAT_INT
1395 /* this is only used for 2.x, not 3.0 */
1396 static PyObject*
1397 int_format(PyObject* value, int base)
1399 /* Convert to base, and use the new octal format. We already
1400 know this is an int object */
1401 assert(PyInt_Check(value));
1402 return _PyInt_Format((PyIntObject*)value, base, 1);
1405 PyObject *
1406 FORMAT_INT(PyObject *obj,
1407 STRINGLIB_CHAR *format_spec,
1408 Py_ssize_t format_spec_len)
1410 return format_int_or_long(obj, format_spec, format_spec_len,
1411 int_format);
1413 #endif /* FORMAT_INT */
1415 #ifdef FORMAT_FLOAT
1416 PyObject *
1417 FORMAT_FLOAT(PyObject *obj,
1418 STRINGLIB_CHAR *format_spec,
1419 Py_ssize_t format_spec_len)
1421 PyObject *result = NULL;
1422 InternalFormatSpec format;
1424 /* check for the special case of zero length format spec, make
1425 it equivalent to str(obj) */
1426 if (format_spec_len == 0) {
1427 result = STRINGLIB_TOSTR(obj);
1428 goto done;
1431 /* parse the format_spec */
1432 if (!parse_internal_render_format_spec(format_spec,
1433 format_spec_len,
1434 &format, '\0'))
1435 goto done;
1437 /* type conversion? */
1438 switch (format.type) {
1439 case '\0': /* No format code: like 'g', but with at least one decimal. */
1440 case 'e':
1441 case 'E':
1442 case 'f':
1443 case 'F':
1444 case 'g':
1445 case 'G':
1446 case 'n':
1447 case '%':
1448 /* no conversion, already a float. do the formatting */
1449 result = format_float_internal(obj, &format);
1450 break;
1452 default:
1453 /* unknown */
1454 unknown_presentation_type(format.type, obj->ob_type->tp_name);
1455 goto done;
1458 done:
1459 return result;
1461 #endif /* FORMAT_FLOAT */
1463 #ifdef FORMAT_COMPLEX
1464 PyObject *
1465 FORMAT_COMPLEX(PyObject *obj,
1466 STRINGLIB_CHAR *format_spec,
1467 Py_ssize_t format_spec_len)
1469 PyObject *result = NULL;
1470 InternalFormatSpec format;
1472 /* check for the special case of zero length format spec, make
1473 it equivalent to str(obj) */
1474 if (format_spec_len == 0) {
1475 result = STRINGLIB_TOSTR(obj);
1476 goto done;
1479 /* parse the format_spec */
1480 if (!parse_internal_render_format_spec(format_spec,
1481 format_spec_len,
1482 &format, '\0'))
1483 goto done;
1485 /* type conversion? */
1486 switch (format.type) {
1487 case '\0': /* No format code: like 'g', but with at least one decimal. */
1488 case 'e':
1489 case 'E':
1490 case 'f':
1491 case 'F':
1492 case 'g':
1493 case 'G':
1494 case 'n':
1495 /* no conversion, already a complex. do the formatting */
1496 result = format_complex_internal(obj, &format);
1497 break;
1499 default:
1500 /* unknown */
1501 unknown_presentation_type(format.type, obj->ob_type->tp_name);
1502 goto done;
1505 done:
1506 return result;
1508 #endif /* FORMAT_COMPLEX */