Issue #7117 (backport py3k float repr) continued:
[python.git] / Python / pystrtod.c
blob2f34b9b6c7fd1fe65ce31270635b7a0b172cae24
1 /* -*- Mode: C; c-file-style: "python" -*- */
3 #include <Python.h>
4 #include <locale.h>
6 /* Case-insensitive string match used for nan and inf detection; t should be
7 lower-case. Returns 1 for a successful match, 0 otherwise. */
9 static int
10 case_insensitive_match(const char *s, const char *t)
12 while(*t && Py_TOLOWER(*s) == *t) {
13 s++;
14 t++;
16 return *t ? 0 : 1;
19 /* _Py_parse_inf_or_nan: Attempt to parse a string of the form "nan", "inf" or
20 "infinity", with an optional leading sign of "+" or "-". On success,
21 return the NaN or Infinity as a double and set *endptr to point just beyond
22 the successfully parsed portion of the string. On failure, return -1.0 and
23 set *endptr to point to the start of the string. */
25 double
26 _Py_parse_inf_or_nan(const char *p, char **endptr)
28 double retval;
29 const char *s;
30 int negate = 0;
32 s = p;
33 if (*s == '-') {
34 negate = 1;
35 s++;
37 else if (*s == '+') {
38 s++;
40 if (case_insensitive_match(s, "inf")) {
41 s += 3;
42 if (case_insensitive_match(s, "inity"))
43 s += 5;
44 retval = negate ? -Py_HUGE_VAL : Py_HUGE_VAL;
46 #ifdef Py_NAN
47 else if (case_insensitive_match(s, "nan")) {
48 s += 3;
49 retval = negate ? -Py_NAN : Py_NAN;
51 #endif
52 else {
53 s = p;
54 retval = -1.0;
56 *endptr = (char *)s;
57 return retval;
60 /**
61 * PyOS_ascii_strtod:
62 * @nptr: the string to convert to a numeric value.
63 * @endptr: if non-%NULL, it returns the character after
64 * the last character used in the conversion.
66 * Converts a string to a #gdouble value.
67 * This function behaves like the standard strtod() function
68 * does in the C locale. It does this without actually
69 * changing the current locale, since that would not be
70 * thread-safe.
72 * This function is typically used when reading configuration
73 * files or other non-user input that should be locale independent.
74 * To handle input from the user you should normally use the
75 * locale-sensitive system strtod() function.
77 * If the correct value would cause overflow, plus or minus %HUGE_VAL
78 * is returned (according to the sign of the value), and %ERANGE is
79 * stored in %errno. If the correct value would cause underflow,
80 * zero is returned and %ERANGE is stored in %errno.
81 * If memory allocation fails, %ENOMEM is stored in %errno.
83 * This function resets %errno before calling strtod() so that
84 * you can reliably detect overflow and underflow.
86 * Return value: the #gdouble value.
87 **/
89 #ifndef PY_NO_SHORT_FLOAT_REPR
91 double
92 _PyOS_ascii_strtod(const char *nptr, char **endptr)
94 double result;
95 _Py_SET_53BIT_PRECISION_HEADER;
97 assert(nptr != NULL);
98 /* Set errno to zero, so that we can distinguish zero results
99 and underflows */
100 errno = 0;
102 _Py_SET_53BIT_PRECISION_START;
103 result = _Py_dg_strtod(nptr, endptr);
104 _Py_SET_53BIT_PRECISION_END;
106 if (*endptr == nptr)
107 /* string might represent and inf or nan */
108 result = _Py_parse_inf_or_nan(nptr, endptr);
110 return result;
114 #else
117 Use system strtod; since strtod is locale aware, we may
118 have to first fix the decimal separator.
120 Note that unlike _Py_dg_strtod, the system strtod may not always give
121 correctly rounded results.
124 double
125 _PyOS_ascii_strtod(const char *nptr, char **endptr)
127 char *fail_pos;
128 double val = -1.0;
129 struct lconv *locale_data;
130 const char *decimal_point;
131 size_t decimal_point_len;
132 const char *p, *decimal_point_pos;
133 const char *end = NULL; /* Silence gcc */
134 const char *digits_pos = NULL;
135 int negate = 0;
137 assert(nptr != NULL);
139 fail_pos = NULL;
141 locale_data = localeconv();
142 decimal_point = locale_data->decimal_point;
143 decimal_point_len = strlen(decimal_point);
145 assert(decimal_point_len != 0);
147 decimal_point_pos = NULL;
149 /* Parse infinities and nans */
150 val = _Py_parse_inf_or_nan(nptr, endptr);
151 if (*endptr != nptr)
152 return val;
154 /* Set errno to zero, so that we can distinguish zero results
155 and underflows */
156 errno = 0;
158 /* We process the optional sign manually, then pass the remainder to
159 the system strtod. This ensures that the result of an underflow
160 has the correct sign. (bug #1725) */
161 p = nptr;
162 /* Process leading sign, if present */
163 if (*p == '-') {
164 negate = 1;
165 p++;
167 else if (*p == '+') {
168 p++;
171 /* Some platform strtods accept hex floats; Python shouldn't (at the
172 moment), so we check explicitly for strings starting with '0x'. */
173 if (*p == '0' && (*(p+1) == 'x' || *(p+1) == 'X'))
174 goto invalid_string;
176 /* Check that what's left begins with a digit or decimal point */
177 if (!Py_ISDIGIT(*p) && *p != '.')
178 goto invalid_string;
180 digits_pos = p;
181 if (decimal_point[0] != '.' ||
182 decimal_point[1] != 0)
184 /* Look for a '.' in the input; if present, it'll need to be
185 swapped for the current locale's decimal point before we
186 call strtod. On the other hand, if we find the current
187 locale's decimal point then the input is invalid. */
188 while (Py_ISDIGIT(*p))
189 p++;
191 if (*p == '.')
193 decimal_point_pos = p++;
195 /* locate end of number */
196 while (Py_ISDIGIT(*p))
197 p++;
199 if (*p == 'e' || *p == 'E')
200 p++;
201 if (*p == '+' || *p == '-')
202 p++;
203 while (Py_ISDIGIT(*p))
204 p++;
205 end = p;
207 else if (strncmp(p, decimal_point, decimal_point_len) == 0)
208 /* Python bug #1417699 */
209 goto invalid_string;
210 /* For the other cases, we need not convert the decimal
211 point */
214 if (decimal_point_pos) {
215 char *copy, *c;
216 /* Create a copy of the input, with the '.' converted to the
217 locale-specific decimal point */
218 copy = (char *)PyMem_MALLOC(end - digits_pos +
219 1 + decimal_point_len);
220 if (copy == NULL) {
221 *endptr = (char *)nptr;
222 errno = ENOMEM;
223 return val;
226 c = copy;
227 memcpy(c, digits_pos, decimal_point_pos - digits_pos);
228 c += decimal_point_pos - digits_pos;
229 memcpy(c, decimal_point, decimal_point_len);
230 c += decimal_point_len;
231 memcpy(c, decimal_point_pos + 1,
232 end - (decimal_point_pos + 1));
233 c += end - (decimal_point_pos + 1);
234 *c = 0;
236 val = strtod(copy, &fail_pos);
238 if (fail_pos)
240 if (fail_pos > decimal_point_pos)
241 fail_pos = (char *)digits_pos +
242 (fail_pos - copy) -
243 (decimal_point_len - 1);
244 else
245 fail_pos = (char *)digits_pos +
246 (fail_pos - copy);
249 PyMem_FREE(copy);
252 else {
253 val = strtod(digits_pos, &fail_pos);
256 if (fail_pos == digits_pos)
257 goto invalid_string;
259 if (negate && fail_pos != nptr)
260 val = -val;
261 *endptr = fail_pos;
263 return val;
265 invalid_string:
266 *endptr = (char*)nptr;
267 errno = EINVAL;
268 return -1.0;
271 #endif
273 double
274 PyOS_ascii_strtod(const char *nptr, char **endptr)
276 char *fail_pos;
277 const char *p;
278 double x;
280 /* _PyOS_ascii_strtod already does everything that we want,
281 except that it doesn't parse leading whitespace */
282 p = nptr;
283 while (Py_ISSPACE(*p))
284 p++;
285 x = _PyOS_ascii_strtod(p, &fail_pos);
286 if (fail_pos == p)
287 fail_pos = (char *)nptr;
288 if (endptr)
289 *endptr = (char *)fail_pos;
290 return x;
293 double
294 PyOS_ascii_atof(const char *nptr)
296 return PyOS_ascii_strtod(nptr, NULL);
299 /* PyOS_string_to_double is the recommended replacement for the
300 PyOS_ascii_strtod and PyOS_ascii_atof functions. It converts a
301 null-terminated byte string s (interpreted as a string of ASCII characters)
302 to a float. The string should not have leading or trailing whitespace (in
303 contrast, PyOS_ascii_strtod allows leading whitespace but not trailing
304 whitespace). The conversion is independent of the current locale.
306 If endptr is NULL, try to convert the whole string. Raise ValueError and
307 return -1.0 if the string is not a valid representation of a floating-point
308 number.
310 If endptr is non-NULL, try to convert as much of the string as possible.
311 If no initial segment of the string is the valid representation of a
312 floating-point number then *endptr is set to point to the beginning of the
313 string, -1.0 is returned and again ValueError is raised.
315 On overflow (e.g., when trying to convert '1e500' on an IEEE 754 machine),
316 if overflow_exception is NULL then +-Py_HUGE_VAL is returned, and no Python
317 exception is raised. Otherwise, overflow_exception should point to a
318 a Python exception, this exception will be raised, -1.0 will be returned,
319 and *endptr will point just past the end of the converted value.
321 If any other failure occurs (for example lack of memory), -1.0 is returned
322 and the appropriate Python exception will have been set.
325 double
326 PyOS_string_to_double(const char *s,
327 char **endptr,
328 PyObject *overflow_exception)
330 double x, result=-1.0;
331 char *fail_pos;
333 errno = 0;
334 PyFPE_START_PROTECT("PyOS_string_to_double", return -1.0)
335 x = PyOS_ascii_strtod(s, &fail_pos);
336 PyFPE_END_PROTECT(x)
338 if (errno == ENOMEM) {
339 PyErr_NoMemory();
340 fail_pos = (char *)s;
342 else if (!endptr && (fail_pos == s || *fail_pos != '\0'))
343 PyErr_Format(PyExc_ValueError,
344 "could not convert string to float: "
345 "%.200s", s);
346 else if (fail_pos == s)
347 PyErr_Format(PyExc_ValueError,
348 "could not convert string to float: "
349 "%.200s", s);
350 else if (errno == ERANGE && fabs(x) >= 1.0 && overflow_exception)
351 PyErr_Format(overflow_exception,
352 "value too large to convert to float: "
353 "%.200s", s);
354 else
355 result = x;
357 if (endptr != NULL)
358 *endptr = fail_pos;
359 return result;
362 /* Given a string that may have a decimal point in the current
363 locale, change it back to a dot. Since the string cannot get
364 longer, no need for a maximum buffer size parameter. */
365 Py_LOCAL_INLINE(void)
366 change_decimal_from_locale_to_dot(char* buffer)
368 struct lconv *locale_data = localeconv();
369 const char *decimal_point = locale_data->decimal_point;
371 if (decimal_point[0] != '.' || decimal_point[1] != 0) {
372 size_t decimal_point_len = strlen(decimal_point);
374 if (*buffer == '+' || *buffer == '-')
375 buffer++;
376 while (Py_ISDIGIT(*buffer))
377 buffer++;
378 if (strncmp(buffer, decimal_point, decimal_point_len) == 0) {
379 *buffer = '.';
380 buffer++;
381 if (decimal_point_len > 1) {
382 /* buffer needs to get smaller */
383 size_t rest_len = strlen(buffer +
384 (decimal_point_len - 1));
385 memmove(buffer,
386 buffer + (decimal_point_len - 1),
387 rest_len);
388 buffer[rest_len] = 0;
395 Py_LOCAL_INLINE(void)
396 ensure_sign(char* buffer, size_t buf_size)
398 size_t len;
400 if (buffer[0] == '-')
401 /* Already have a sign. */
402 return;
404 /* Include the trailing 0 byte. */
405 len = strlen(buffer)+1;
406 if (len >= buf_size+1)
407 /* No room for the sign, don't do anything. */
408 return;
410 memmove(buffer+1, buffer, len);
411 buffer[0] = '+';
414 /* From the C99 standard, section 7.19.6:
415 The exponent always contains at least two digits, and only as many more digits
416 as necessary to represent the exponent.
418 #define MIN_EXPONENT_DIGITS 2
420 /* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS
421 in length. */
422 Py_LOCAL_INLINE(void)
423 ensure_minimum_exponent_length(char* buffer, size_t buf_size)
425 char *p = strpbrk(buffer, "eE");
426 if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
427 char *start = p + 2;
428 int exponent_digit_cnt = 0;
429 int leading_zero_cnt = 0;
430 int in_leading_zeros = 1;
431 int significant_digit_cnt;
433 /* Skip over the exponent and the sign. */
434 p += 2;
436 /* Find the end of the exponent, keeping track of leading
437 zeros. */
438 while (*p && Py_ISDIGIT(*p)) {
439 if (in_leading_zeros && *p == '0')
440 ++leading_zero_cnt;
441 if (*p != '0')
442 in_leading_zeros = 0;
443 ++p;
444 ++exponent_digit_cnt;
447 significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
448 if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
449 /* If there are 2 exactly digits, we're done,
450 regardless of what they contain */
452 else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
453 int extra_zeros_cnt;
455 /* There are more than 2 digits in the exponent. See
456 if we can delete some of the leading zeros */
457 if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
458 significant_digit_cnt = MIN_EXPONENT_DIGITS;
459 extra_zeros_cnt = exponent_digit_cnt -
460 significant_digit_cnt;
462 /* Delete extra_zeros_cnt worth of characters from the
463 front of the exponent */
464 assert(extra_zeros_cnt >= 0);
466 /* Add one to significant_digit_cnt to copy the
467 trailing 0 byte, thus setting the length */
468 memmove(start,
469 start + extra_zeros_cnt,
470 significant_digit_cnt + 1);
472 else {
473 /* If there are fewer than 2 digits, add zeros
474 until there are 2, if there's enough room */
475 int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
476 if (start + zeros + exponent_digit_cnt + 1
477 < buffer + buf_size) {
478 memmove(start + zeros, start,
479 exponent_digit_cnt + 1);
480 memset(start, '0', zeros);
486 /* Remove trailing zeros after the decimal point from a numeric string; also
487 remove the decimal point if all digits following it are zero. The numeric
488 string must end in '\0', and should not have any leading or trailing
489 whitespace. Assumes that the decimal point is '.'. */
490 Py_LOCAL_INLINE(void)
491 remove_trailing_zeros(char *buffer)
493 char *old_fraction_end, *new_fraction_end, *end, *p;
495 p = buffer;
496 if (*p == '-' || *p == '+')
497 /* Skip leading sign, if present */
498 ++p;
499 while (Py_ISDIGIT(*p))
500 ++p;
502 /* if there's no decimal point there's nothing to do */
503 if (*p++ != '.')
504 return;
506 /* scan any digits after the point */
507 while (Py_ISDIGIT(*p))
508 ++p;
509 old_fraction_end = p;
511 /* scan up to ending '\0' */
512 while (*p != '\0')
513 p++;
514 /* +1 to make sure that we move the null byte as well */
515 end = p+1;
517 /* scan back from fraction_end, looking for removable zeros */
518 p = old_fraction_end;
519 while (*(p-1) == '0')
520 --p;
521 /* and remove point if we've got that far */
522 if (*(p-1) == '.')
523 --p;
524 new_fraction_end = p;
526 memmove(new_fraction_end, old_fraction_end, end-old_fraction_end);
529 /* Ensure that buffer has a decimal point in it. The decimal point will not
530 be in the current locale, it will always be '.'. Don't add a decimal point
531 if an exponent is present. Also, convert to exponential notation where
532 adding a '.0' would produce too many significant digits (see issue 5864).
534 Returns a pointer to the fixed buffer, or NULL on failure.
536 Py_LOCAL_INLINE(char *)
537 ensure_decimal_point(char* buffer, size_t buf_size, int precision)
539 int digit_count, insert_count = 0, convert_to_exp = 0;
540 char *chars_to_insert, *digits_start;
542 /* search for the first non-digit character */
543 char *p = buffer;
544 if (*p == '-' || *p == '+')
545 /* Skip leading sign, if present. I think this could only
546 ever be '-', but it can't hurt to check for both. */
547 ++p;
548 digits_start = p;
549 while (*p && Py_ISDIGIT(*p))
550 ++p;
551 digit_count = Py_SAFE_DOWNCAST(p - digits_start, Py_ssize_t, int);
553 if (*p == '.') {
554 if (Py_ISDIGIT(*(p+1))) {
555 /* Nothing to do, we already have a decimal
556 point and a digit after it */
558 else {
559 /* We have a decimal point, but no following
560 digit. Insert a zero after the decimal. */
561 /* can't ever get here via PyOS_double_to_string */
562 assert(precision == -1);
563 ++p;
564 chars_to_insert = "0";
565 insert_count = 1;
568 else if (!(*p == 'e' || *p == 'E')) {
569 /* Don't add ".0" if we have an exponent. */
570 if (digit_count == precision) {
571 /* issue 5864: don't add a trailing .0 in the case
572 where the '%g'-formatted result already has as many
573 significant digits as were requested. Switch to
574 exponential notation instead. */
575 convert_to_exp = 1;
576 /* no exponent, no point, and we shouldn't land here
577 for infs and nans, so we must be at the end of the
578 string. */
579 assert(*p == '\0');
581 else {
582 assert(precision == -1 || digit_count < precision);
583 chars_to_insert = ".0";
584 insert_count = 2;
587 if (insert_count) {
588 size_t buf_len = strlen(buffer);
589 if (buf_len + insert_count + 1 >= buf_size) {
590 /* If there is not enough room in the buffer
591 for the additional text, just skip it. It's
592 not worth generating an error over. */
594 else {
595 memmove(p + insert_count, p,
596 buffer + strlen(buffer) - p + 1);
597 memcpy(p, chars_to_insert, insert_count);
600 if (convert_to_exp) {
601 int written;
602 size_t buf_avail;
603 p = digits_start;
604 /* insert decimal point */
605 assert(digit_count >= 1);
606 memmove(p+2, p+1, digit_count); /* safe, but overwrites nul */
607 p[1] = '.';
608 p += digit_count+1;
609 assert(p <= buf_size+buffer);
610 buf_avail = buf_size+buffer-p;
611 if (buf_avail == 0)
612 return NULL;
613 /* Add exponent. It's okay to use lower case 'e': we only
614 arrive here as a result of using the empty format code or
615 repr/str builtins and those never want an upper case 'E' */
616 written = PyOS_snprintf(p, buf_avail, "e%+.02d", digit_count-1);
617 if (!(0 <= written &&
618 written < Py_SAFE_DOWNCAST(buf_avail, size_t, int)))
619 /* output truncated, or something else bad happened */
620 return NULL;
621 remove_trailing_zeros(buffer);
623 return buffer;
626 /* see FORMATBUFLEN in unicodeobject.c */
627 #define FLOAT_FORMATBUFLEN 120
630 * PyOS_ascii_formatd:
631 * @buffer: A buffer to place the resulting string in
632 * @buf_size: The length of the buffer.
633 * @format: The printf()-style format to use for the
634 * code to use for converting.
635 * @d: The #gdouble to convert
637 * Converts a #gdouble to a string, using the '.' as
638 * decimal point. To format the number you pass in
639 * a printf()-style format string. Allowed conversion
640 * specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'Z'.
642 * 'Z' is the same as 'g', except it always has a decimal and
643 * at least one digit after the decimal.
645 * Return value: The pointer to the buffer with the converted string.
646 * On failure returns NULL but does not set any Python exception.
648 char *
649 _PyOS_ascii_formatd(char *buffer,
650 size_t buf_size,
651 const char *format,
652 double d,
653 int precision)
655 char format_char;
656 size_t format_len = strlen(format);
658 /* Issue 2264: code 'Z' requires copying the format. 'Z' is 'g', but
659 also with at least one character past the decimal. */
660 char tmp_format[FLOAT_FORMATBUFLEN];
662 /* The last character in the format string must be the format char */
663 format_char = format[format_len - 1];
665 if (format[0] != '%')
666 return NULL;
668 /* I'm not sure why this test is here. It's ensuring that the format
669 string after the first character doesn't have a single quote, a
670 lowercase l, or a percent. This is the reverse of the commented-out
671 test about 10 lines ago. */
672 if (strpbrk(format + 1, "'l%"))
673 return NULL;
675 /* Also curious about this function is that it accepts format strings
676 like "%xg", which are invalid for floats. In general, the
677 interface to this function is not very good, but changing it is
678 difficult because it's a public API. */
680 if (!(format_char == 'e' || format_char == 'E' ||
681 format_char == 'f' || format_char == 'F' ||
682 format_char == 'g' || format_char == 'G' ||
683 format_char == 'Z'))
684 return NULL;
686 /* Map 'Z' format_char to 'g', by copying the format string and
687 replacing the final char with a 'g' */
688 if (format_char == 'Z') {
689 if (format_len + 1 >= sizeof(tmp_format)) {
690 /* The format won't fit in our copy. Error out. In
691 practice, this will never happen and will be
692 detected by returning NULL */
693 return NULL;
695 strcpy(tmp_format, format);
696 tmp_format[format_len - 1] = 'g';
697 format = tmp_format;
701 /* Have PyOS_snprintf do the hard work */
702 PyOS_snprintf(buffer, buf_size, format, d);
704 /* Do various fixups on the return string */
706 /* Get the current locale, and find the decimal point string.
707 Convert that string back to a dot. */
708 change_decimal_from_locale_to_dot(buffer);
710 /* If an exponent exists, ensure that the exponent is at least
711 MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
712 for the extra zeros. Also, if there are more than
713 MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
714 back to MIN_EXPONENT_DIGITS */
715 ensure_minimum_exponent_length(buffer, buf_size);
717 /* If format_char is 'Z', make sure we have at least one character
718 after the decimal point (and make sure we have a decimal point);
719 also switch to exponential notation in some edge cases where the
720 extra character would produce more significant digits that we
721 really want. */
722 if (format_char == 'Z')
723 buffer = ensure_decimal_point(buffer, buf_size, precision);
725 return buffer;
728 char *
729 PyOS_ascii_formatd(char *buffer,
730 size_t buf_size,
731 const char *format,
732 double d)
734 if (PyErr_WarnEx(PyExc_DeprecationWarning,
735 "PyOS_ascii_formatd is deprecated, "
736 "use PyOS_double_to_string instead", 1) < 0)
737 return NULL;
739 return _PyOS_ascii_formatd(buffer, buf_size, format, d, -1);
742 PyAPI_FUNC(void)
743 _PyOS_double_to_string(char *buf, size_t buf_len, double val,
744 char format_code, int precision,
745 int flags, int *ptype)
747 char format[32];
748 int t;
749 int upper = 0;
751 if (buf_len < 1) {
752 assert(0);
753 /* There's no way to signal this error. Just return. */
754 return;
756 buf[0] = 0;
758 /* Validate format_code, and map upper and lower case */
759 switch (format_code) {
760 case 'e': /* exponent */
761 case 'f': /* fixed */
762 case 'g': /* general */
763 break;
764 case 'E':
765 upper = 1;
766 format_code = 'e';
767 break;
768 case 'F':
769 upper = 1;
770 format_code = 'f';
771 break;
772 case 'G':
773 upper = 1;
774 format_code = 'g';
775 break;
776 case 'r': /* repr format */
777 /* Supplied precision is unused, must be 0. */
778 if (precision != 0)
779 return;
780 /* The repr() precision (17 significant decimal digits) is the
781 minimal number that is guaranteed to have enough precision
782 so that if the number is read back in the exact same binary
783 value is recreated. This is true for IEEE floating point
784 by design, and also happens to work for all other modern
785 hardware. */
786 precision = 17;
787 format_code = 'g';
788 break;
789 default:
790 assert(0);
791 return;
794 /* Check for buf too small to fit "-inf". Other buffer too small
795 conditions are dealt with when converting or formatting finite
796 numbers. */
797 if (buf_len < 5) {
798 assert(0);
799 return;
802 /* Handle nan and inf. */
803 if (Py_IS_NAN(val)) {
804 strcpy(buf, "nan");
805 t = Py_DTST_NAN;
806 } else if (Py_IS_INFINITY(val)) {
807 if (copysign(1., val) == 1.)
808 strcpy(buf, "inf");
809 else
810 strcpy(buf, "-inf");
811 t = Py_DTST_INFINITE;
812 } else {
813 t = Py_DTST_FINITE;
815 /* Build the format string. */
816 PyOS_snprintf(format, sizeof(format), "%%%s.%i%c",
817 (flags & Py_DTSF_ALT ? "#" : ""), precision,
818 format_code);
820 /* Have PyOS_snprintf do the hard work. */
821 PyOS_snprintf(buf, buf_len, format, val);
823 /* Do various fixups on the return string */
825 /* Get the current locale, and find the decimal point string.
826 Convert that string back to a dot. */
827 change_decimal_from_locale_to_dot(buf);
829 /* If an exponent exists, ensure that the exponent is at least
830 MIN_EXPONENT_DIGITS digits, providing the buffer is large
831 enough for the extra zeros. Also, if there are more than
832 MIN_EXPONENT_DIGITS, remove as many zeros as possible until
833 we get back to MIN_EXPONENT_DIGITS */
834 ensure_minimum_exponent_length(buf, buf_len);
836 /* Possibly make sure we have at least one character after the
837 decimal point (and make sure we have a decimal point). */
838 if (flags & Py_DTSF_ADD_DOT_0)
839 buf = ensure_decimal_point(buf, buf_len, precision);
842 /* Add the sign if asked and the result isn't negative. */
843 if (flags & Py_DTSF_SIGN && buf[0] != '-')
844 ensure_sign(buf, buf_len);
846 if (upper) {
847 /* Convert to upper case. */
848 char *p;
849 for (p = buf; *p; p++)
850 *p = Py_TOUPPER(*p);
853 if (ptype)
854 *ptype = t;
858 #ifdef PY_NO_SHORT_FLOAT_REPR
860 /* The fallback code to use if _Py_dg_dtoa is not available. */
862 PyAPI_FUNC(char *) PyOS_double_to_string(double val,
863 char format_code,
864 int precision,
865 int flags,
866 int *type)
868 char format[32];
869 Py_ssize_t bufsize;
870 char *buf;
871 int t, exp;
872 int upper = 0;
874 /* Validate format_code, and map upper and lower case */
875 switch (format_code) {
876 case 'e': /* exponent */
877 case 'f': /* fixed */
878 case 'g': /* general */
879 break;
880 case 'E':
881 upper = 1;
882 format_code = 'e';
883 break;
884 case 'F':
885 upper = 1;
886 format_code = 'f';
887 break;
888 case 'G':
889 upper = 1;
890 format_code = 'g';
891 break;
892 case 'r': /* repr format */
893 /* Supplied precision is unused, must be 0. */
894 if (precision != 0) {
895 PyErr_BadInternalCall();
896 return NULL;
898 /* The repr() precision (17 significant decimal digits) is the
899 minimal number that is guaranteed to have enough precision
900 so that if the number is read back in the exact same binary
901 value is recreated. This is true for IEEE floating point
902 by design, and also happens to work for all other modern
903 hardware. */
904 precision = 17;
905 format_code = 'g';
906 break;
907 default:
908 PyErr_BadInternalCall();
909 return NULL;
912 /* Here's a quick-and-dirty calculation to figure out how big a buffer
913 we need. In general, for a finite float we need:
915 1 byte for each digit of the decimal significand, and
917 1 for a possible sign
918 1 for a possible decimal point
919 2 for a possible [eE][+-]
920 1 for each digit of the exponent; if we allow 19 digits
921 total then we're safe up to exponents of 2**63.
922 1 for the trailing nul byte
924 This gives a total of 24 + the number of digits in the significand,
925 and the number of digits in the significand is:
927 for 'g' format: at most precision, except possibly
928 when precision == 0, when it's 1.
929 for 'e' format: precision+1
930 for 'f' format: precision digits after the point, at least 1
931 before. To figure out how many digits appear before the point
932 we have to examine the size of the number. If fabs(val) < 1.0
933 then there will be only one digit before the point. If
934 fabs(val) >= 1.0, then there are at most
936 1+floor(log10(ceiling(fabs(val))))
938 digits before the point (where the 'ceiling' allows for the
939 possibility that the rounding rounds the integer part of val
940 up). A safe upper bound for the above quantity is
941 1+floor(exp/3), where exp is the unique integer such that 0.5
942 <= fabs(val)/2**exp < 1.0. This exp can be obtained from
943 frexp.
945 So we allow room for precision+1 digits for all formats, plus an
946 extra floor(exp/3) digits for 'f' format.
950 if (Py_IS_NAN(val) || Py_IS_INFINITY(val))
951 /* 3 for 'inf'/'nan', 1 for sign, 1 for '\0' */
952 bufsize = 5;
953 else {
954 bufsize = 25 + precision;
955 if (format_code == 'f' && fabs(val) >= 1.0) {
956 frexp(val, &exp);
957 bufsize += exp/3;
961 buf = PyMem_Malloc(bufsize);
962 if (buf == NULL) {
963 PyErr_NoMemory();
964 return NULL;
967 /* Handle nan and inf. */
968 if (Py_IS_NAN(val)) {
969 strcpy(buf, "nan");
970 t = Py_DTST_NAN;
971 } else if (Py_IS_INFINITY(val)) {
972 if (copysign(1., val) == 1.)
973 strcpy(buf, "inf");
974 else
975 strcpy(buf, "-inf");
976 t = Py_DTST_INFINITE;
977 } else {
978 t = Py_DTST_FINITE;
979 if (flags & Py_DTSF_ADD_DOT_0)
980 format_code = 'Z';
982 PyOS_snprintf(format, sizeof(format), "%%%s.%i%c",
983 (flags & Py_DTSF_ALT ? "#" : ""), precision,
984 format_code);
985 _PyOS_ascii_formatd(buf, bufsize, format, val, precision);
988 /* Add sign when requested. It's convenient (esp. when formatting
989 complex numbers) to include a sign even for inf and nan. */
990 if (flags & Py_DTSF_SIGN && buf[0] != '-') {
991 size_t len = strlen(buf);
992 /* the bufsize calculations above should ensure that we've got
993 space to add a sign */
994 assert((size_t)bufsize >= len+2);
995 memmove(buf+1, buf, len+1);
996 buf[0] = '+';
998 if (upper) {
999 /* Convert to upper case. */
1000 char *p1;
1001 for (p1 = buf; *p1; p1++)
1002 *p1 = Py_TOUPPER(*p1);
1005 if (type)
1006 *type = t;
1007 return buf;
1010 #else
1012 /* _Py_dg_dtoa is available. */
1014 /* I'm using a lookup table here so that I don't have to invent a non-locale
1015 specific way to convert to uppercase */
1016 #define OFS_INF 0
1017 #define OFS_NAN 1
1018 #define OFS_E 2
1020 /* The lengths of these are known to the code below, so don't change them */
1021 static char *lc_float_strings[] = {
1022 "inf",
1023 "nan",
1024 "e",
1026 static char *uc_float_strings[] = {
1027 "INF",
1028 "NAN",
1029 "E",
1033 /* Convert a double d to a string, and return a PyMem_Malloc'd block of
1034 memory contain the resulting string.
1036 Arguments:
1037 d is the double to be converted
1038 format_code is one of 'e', 'f', 'g', 'r'. 'e', 'f' and 'g'
1039 correspond to '%e', '%f' and '%g'; 'r' corresponds to repr.
1040 mode is one of '0', '2' or '3', and is completely determined by
1041 format_code: 'e' and 'g' use mode 2; 'f' mode 3, 'r' mode 0.
1042 precision is the desired precision
1043 always_add_sign is nonzero if a '+' sign should be included for positive
1044 numbers
1045 add_dot_0_if_integer is nonzero if integers in non-exponential form
1046 should have ".0" added. Only applies to format codes 'r' and 'g'.
1047 use_alt_formatting is nonzero if alternative formatting should be
1048 used. Only applies to format codes 'e', 'f' and 'g'. For code 'g',
1049 at most one of use_alt_formatting and add_dot_0_if_integer should
1050 be nonzero.
1051 type, if non-NULL, will be set to one of these constants to identify
1052 the type of the 'd' argument:
1053 Py_DTST_FINITE
1054 Py_DTST_INFINITE
1055 Py_DTST_NAN
1057 Returns a PyMem_Malloc'd block of memory containing the resulting string,
1058 or NULL on error. If NULL is returned, the Python error has been set.
1061 static char *
1062 format_float_short(double d, char format_code,
1063 int mode, Py_ssize_t precision,
1064 int always_add_sign, int add_dot_0_if_integer,
1065 int use_alt_formatting, char **float_strings, int *type)
1067 char *buf = NULL;
1068 char *p = NULL;
1069 Py_ssize_t bufsize = 0;
1070 char *digits, *digits_end;
1071 int decpt_as_int, sign, exp_len, exp = 0, use_exp = 0;
1072 Py_ssize_t decpt, digits_len, vdigits_start, vdigits_end;
1073 _Py_SET_53BIT_PRECISION_HEADER;
1075 /* _Py_dg_dtoa returns a digit string (no decimal point or exponent).
1076 Must be matched by a call to _Py_dg_freedtoa. */
1077 _Py_SET_53BIT_PRECISION_START;
1078 digits = _Py_dg_dtoa(d, mode, precision, &decpt_as_int, &sign,
1079 &digits_end);
1080 _Py_SET_53BIT_PRECISION_END;
1082 decpt = (Py_ssize_t)decpt_as_int;
1083 if (digits == NULL) {
1084 /* The only failure mode is no memory. */
1085 PyErr_NoMemory();
1086 goto exit;
1088 assert(digits_end != NULL && digits_end >= digits);
1089 digits_len = digits_end - digits;
1091 if (digits_len && !Py_ISDIGIT(digits[0])) {
1092 /* Infinities and nans here; adapt Gay's output,
1093 so convert Infinity to inf and NaN to nan, and
1094 ignore sign of nan. Then return. */
1096 /* ignore the actual sign of a nan */
1097 if (digits[0] == 'n' || digits[0] == 'N')
1098 sign = 0;
1100 /* We only need 5 bytes to hold the result "+inf\0" . */
1101 bufsize = 5; /* Used later in an assert. */
1102 buf = (char *)PyMem_Malloc(bufsize);
1103 if (buf == NULL) {
1104 PyErr_NoMemory();
1105 goto exit;
1107 p = buf;
1109 if (sign == 1) {
1110 *p++ = '-';
1112 else if (always_add_sign) {
1113 *p++ = '+';
1115 if (digits[0] == 'i' || digits[0] == 'I') {
1116 strncpy(p, float_strings[OFS_INF], 3);
1117 p += 3;
1119 if (type)
1120 *type = Py_DTST_INFINITE;
1122 else if (digits[0] == 'n' || digits[0] == 'N') {
1123 strncpy(p, float_strings[OFS_NAN], 3);
1124 p += 3;
1126 if (type)
1127 *type = Py_DTST_NAN;
1129 else {
1130 /* shouldn't get here: Gay's code should always return
1131 something starting with a digit, an 'I', or 'N' */
1132 strncpy(p, "ERR", 3);
1133 p += 3;
1134 assert(0);
1136 goto exit;
1139 /* The result must be finite (not inf or nan). */
1140 if (type)
1141 *type = Py_DTST_FINITE;
1144 /* We got digits back, format them. We may need to pad 'digits'
1145 either on the left or right (or both) with extra zeros, so in
1146 general the resulting string has the form
1148 [<sign>]<zeros><digits><zeros>[<exponent>]
1150 where either of the <zeros> pieces could be empty, and there's a
1151 decimal point that could appear either in <digits> or in the
1152 leading or trailing <zeros>.
1154 Imagine an infinite 'virtual' string vdigits, consisting of the
1155 string 'digits' (starting at index 0) padded on both the left and
1156 right with infinite strings of zeros. We want to output a slice
1158 vdigits[vdigits_start : vdigits_end]
1160 of this virtual string. Thus if vdigits_start < 0 then we'll end
1161 up producing some leading zeros; if vdigits_end > digits_len there
1162 will be trailing zeros in the output. The next section of code
1163 determines whether to use an exponent or not, figures out the
1164 position 'decpt' of the decimal point, and computes 'vdigits_start'
1165 and 'vdigits_end'. */
1166 vdigits_end = digits_len;
1167 switch (format_code) {
1168 case 'e':
1169 use_exp = 1;
1170 vdigits_end = precision;
1171 break;
1172 case 'f':
1173 vdigits_end = decpt + precision;
1174 break;
1175 case 'g':
1176 if (decpt <= -4 || decpt >
1177 (add_dot_0_if_integer ? precision-1 : precision))
1178 use_exp = 1;
1179 if (use_alt_formatting)
1180 vdigits_end = precision;
1181 break;
1182 case 'r':
1183 /* convert to exponential format at 1e16. We used to convert
1184 at 1e17, but that gives odd-looking results for some values
1185 when a 16-digit 'shortest' repr is padded with bogus zeros.
1186 For example, repr(2e16+8) would give 20000000000000010.0;
1187 the true value is 20000000000000008.0. */
1188 if (decpt <= -4 || decpt > 16)
1189 use_exp = 1;
1190 break;
1191 default:
1192 PyErr_BadInternalCall();
1193 goto exit;
1196 /* if using an exponent, reset decimal point position to 1 and adjust
1197 exponent accordingly.*/
1198 if (use_exp) {
1199 exp = decpt - 1;
1200 decpt = 1;
1202 /* ensure vdigits_start < decpt <= vdigits_end, or vdigits_start <
1203 decpt < vdigits_end if add_dot_0_if_integer and no exponent */
1204 vdigits_start = decpt <= 0 ? decpt-1 : 0;
1205 if (!use_exp && add_dot_0_if_integer)
1206 vdigits_end = vdigits_end > decpt ? vdigits_end : decpt + 1;
1207 else
1208 vdigits_end = vdigits_end > decpt ? vdigits_end : decpt;
1210 /* double check inequalities */
1211 assert(vdigits_start <= 0 &&
1212 0 <= digits_len &&
1213 digits_len <= vdigits_end);
1214 /* decimal point should be in (vdigits_start, vdigits_end] */
1215 assert(vdigits_start < decpt && decpt <= vdigits_end);
1217 /* Compute an upper bound how much memory we need. This might be a few
1218 chars too long, but no big deal. */
1219 bufsize =
1220 /* sign, decimal point and trailing 0 byte */
1223 /* total digit count (including zero padding on both sides) */
1224 (vdigits_end - vdigits_start) +
1226 /* exponent "e+100", max 3 numerical digits */
1227 (use_exp ? 5 : 0);
1229 /* Now allocate the memory and initialize p to point to the start of
1230 it. */
1231 buf = (char *)PyMem_Malloc(bufsize);
1232 if (buf == NULL) {
1233 PyErr_NoMemory();
1234 goto exit;
1236 p = buf;
1238 /* Add a negative sign if negative, and a plus sign if non-negative
1239 and always_add_sign is true. */
1240 if (sign == 1)
1241 *p++ = '-';
1242 else if (always_add_sign)
1243 *p++ = '+';
1245 /* note that exactly one of the three 'if' conditions is true,
1246 so we include exactly one decimal point */
1247 /* Zero padding on left of digit string */
1248 if (decpt <= 0) {
1249 memset(p, '0', decpt-vdigits_start);
1250 p += decpt - vdigits_start;
1251 *p++ = '.';
1252 memset(p, '0', 0-decpt);
1253 p += 0-decpt;
1255 else {
1256 memset(p, '0', 0-vdigits_start);
1257 p += 0 - vdigits_start;
1260 /* Digits, with included decimal point */
1261 if (0 < decpt && decpt <= digits_len) {
1262 strncpy(p, digits, decpt-0);
1263 p += decpt-0;
1264 *p++ = '.';
1265 strncpy(p, digits+decpt, digits_len-decpt);
1266 p += digits_len-decpt;
1268 else {
1269 strncpy(p, digits, digits_len);
1270 p += digits_len;
1273 /* And zeros on the right */
1274 if (digits_len < decpt) {
1275 memset(p, '0', decpt-digits_len);
1276 p += decpt-digits_len;
1277 *p++ = '.';
1278 memset(p, '0', vdigits_end-decpt);
1279 p += vdigits_end-decpt;
1281 else {
1282 memset(p, '0', vdigits_end-digits_len);
1283 p += vdigits_end-digits_len;
1286 /* Delete a trailing decimal pt unless using alternative formatting. */
1287 if (p[-1] == '.' && !use_alt_formatting)
1288 p--;
1290 /* Now that we've done zero padding, add an exponent if needed. */
1291 if (use_exp) {
1292 *p++ = float_strings[OFS_E][0];
1293 exp_len = sprintf(p, "%+.02d", exp);
1294 p += exp_len;
1296 exit:
1297 if (buf) {
1298 *p = '\0';
1299 /* It's too late if this fails, as we've already stepped on
1300 memory that isn't ours. But it's an okay debugging test. */
1301 assert(p-buf < bufsize);
1303 if (digits)
1304 _Py_dg_freedtoa(digits);
1306 return buf;
1310 PyAPI_FUNC(char *) PyOS_double_to_string(double val,
1311 char format_code,
1312 int precision,
1313 int flags,
1314 int *type)
1316 char **float_strings = lc_float_strings;
1317 int mode;
1319 /* Validate format_code, and map upper and lower case. Compute the
1320 mode and make any adjustments as needed. */
1321 switch (format_code) {
1322 /* exponent */
1323 case 'E':
1324 float_strings = uc_float_strings;
1325 format_code = 'e';
1326 /* Fall through. */
1327 case 'e':
1328 mode = 2;
1329 precision++;
1330 break;
1332 /* fixed */
1333 case 'F':
1334 float_strings = uc_float_strings;
1335 format_code = 'f';
1336 /* Fall through. */
1337 case 'f':
1338 mode = 3;
1339 break;
1341 /* general */
1342 case 'G':
1343 float_strings = uc_float_strings;
1344 format_code = 'g';
1345 /* Fall through. */
1346 case 'g':
1347 mode = 2;
1348 /* precision 0 makes no sense for 'g' format; interpret as 1 */
1349 if (precision == 0)
1350 precision = 1;
1351 break;
1353 /* repr format */
1354 case 'r':
1355 mode = 0;
1356 /* Supplied precision is unused, must be 0. */
1357 if (precision != 0) {
1358 PyErr_BadInternalCall();
1359 return NULL;
1361 break;
1363 default:
1364 PyErr_BadInternalCall();
1365 return NULL;
1368 return format_float_short(val, format_code, mode, precision,
1369 flags & Py_DTSF_SIGN,
1370 flags & Py_DTSF_ADD_DOT_0,
1371 flags & Py_DTSF_ALT,
1372 float_strings, type);
1374 #endif /* ifdef PY_NO_SHORT_FLOAT_REPR */