Issue #6431: Fix Fraction comparisons with unknown types, and with
[python.git] / Python / pystrtod.c
blob79f63e2603538d2f72964613eb28a4d15cf7fcf0
1 /* -*- Mode: C; c-file-style: "python" -*- */
3 #include <Python.h>
4 #include <locale.h>
6 /**
7 * PyOS_ascii_strtod:
8 * @nptr: the string to convert to a numeric value.
9 * @endptr: if non-%NULL, it returns the character after
10 * the last character used in the conversion.
12 * Converts a string to a #gdouble value.
13 * This function behaves like the standard strtod() function
14 * does in the C locale. It does this without actually
15 * changing the current locale, since that would not be
16 * thread-safe.
18 * This function is typically used when reading configuration
19 * files or other non-user input that should be locale independent.
20 * To handle input from the user you should normally use the
21 * locale-sensitive system strtod() function.
23 * If the correct value would cause overflow, plus or minus %HUGE_VAL
24 * is returned (according to the sign of the value), and %ERANGE is
25 * stored in %errno. If the correct value would cause underflow,
26 * zero is returned and %ERANGE is stored in %errno.
27 * If memory allocation fails, %ENOMEM is stored in %errno.
29 * This function resets %errno before calling strtod() so that
30 * you can reliably detect overflow and underflow.
32 * Return value: the #gdouble value.
33 **/
36 Use system strtod; since strtod is locale aware, we may
37 have to first fix the decimal separator.
39 Note that unlike _Py_dg_strtod, the system strtod may not always give
40 correctly rounded results.
43 /* Case-insensitive string match used for nan and inf detection; t should be
44 lower-case. Returns 1 for a successful match, 0 otherwise. */
46 static int
47 case_insensitive_match(const char *s, const char *t)
49 while(*t && Py_TOLOWER(*s) == *t) {
50 s++;
51 t++;
53 return *t ? 0 : 1;
56 double
57 PyOS_ascii_strtod(const char *nptr, char **endptr)
59 char *fail_pos;
60 double val = -1.0;
61 struct lconv *locale_data;
62 const char *decimal_point;
63 size_t decimal_point_len;
64 const char *p, *decimal_point_pos;
65 const char *end = NULL; /* Silence gcc */
66 const char *digits_pos = NULL;
67 int negate = 0;
69 assert(nptr != NULL);
71 fail_pos = NULL;
73 locale_data = localeconv();
74 decimal_point = locale_data->decimal_point;
75 decimal_point_len = strlen(decimal_point);
77 assert(decimal_point_len != 0);
79 decimal_point_pos = NULL;
81 /* Set errno to zero, so that we can distinguish zero results
82 and underflows */
83 errno = 0;
85 /* We process any leading whitespace and the optional sign manually,
86 then pass the remainder to the system strtod. This ensures that
87 the result of an underflow has the correct sign. (bug #1725) */
89 p = nptr;
90 /* Skip leading space */
91 while (Py_ISSPACE(*p))
92 p++;
94 /* Process leading sign, if present */
95 if (*p == '-') {
96 negate = 1;
97 p++;
99 else if (*p == '+') {
100 p++;
103 /* Parse infinities and nans */
104 if (*p == 'i' || *p == 'I') {
105 if (case_insensitive_match(p+1, "nf")) {
106 val = Py_HUGE_VAL;
107 if (case_insensitive_match(p+3, "inity"))
108 fail_pos = (char *)p+8;
109 else
110 fail_pos = (char *)p+3;
111 goto got_val;
113 else
114 goto invalid_string;
116 #ifdef Py_NAN
117 if (*p == 'n' || *p == 'N') {
118 if (case_insensitive_match(p+1, "an")) {
119 val = Py_NAN;
120 fail_pos = (char *)p+3;
121 goto got_val;
123 else
124 goto invalid_string;
126 #endif
128 /* Some platform strtods accept hex floats; Python shouldn't (at the
129 moment), so we check explicitly for strings starting with '0x'. */
130 if (*p == '0' && (*(p+1) == 'x' || *(p+1) == 'X'))
131 goto invalid_string;
133 /* Check that what's left begins with a digit or decimal point */
134 if (!Py_ISDIGIT(*p) && *p != '.')
135 goto invalid_string;
137 digits_pos = p;
138 if (decimal_point[0] != '.' ||
139 decimal_point[1] != 0)
141 /* Look for a '.' in the input; if present, it'll need to be
142 swapped for the current locale's decimal point before we
143 call strtod. On the other hand, if we find the current
144 locale's decimal point then the input is invalid. */
145 while (Py_ISDIGIT(*p))
146 p++;
148 if (*p == '.')
150 decimal_point_pos = p++;
152 /* locate end of number */
153 while (Py_ISDIGIT(*p))
154 p++;
156 if (*p == 'e' || *p == 'E')
157 p++;
158 if (*p == '+' || *p == '-')
159 p++;
160 while (Py_ISDIGIT(*p))
161 p++;
162 end = p;
164 else if (strncmp(p, decimal_point, decimal_point_len) == 0)
165 /* Python bug #1417699 */
166 goto invalid_string;
167 /* For the other cases, we need not convert the decimal
168 point */
171 if (decimal_point_pos) {
172 char *copy, *c;
173 /* Create a copy of the input, with the '.' converted to the
174 locale-specific decimal point */
175 copy = (char *)PyMem_MALLOC(end - digits_pos +
176 1 + decimal_point_len);
177 if (copy == NULL) {
178 if (endptr)
179 *endptr = (char *)nptr;
180 errno = ENOMEM;
181 return val;
184 c = copy;
185 memcpy(c, digits_pos, decimal_point_pos - digits_pos);
186 c += decimal_point_pos - digits_pos;
187 memcpy(c, decimal_point, decimal_point_len);
188 c += decimal_point_len;
189 memcpy(c, decimal_point_pos + 1,
190 end - (decimal_point_pos + 1));
191 c += end - (decimal_point_pos + 1);
192 *c = 0;
194 val = strtod(copy, &fail_pos);
196 if (fail_pos)
198 if (fail_pos > decimal_point_pos)
199 fail_pos = (char *)digits_pos +
200 (fail_pos - copy) -
201 (decimal_point_len - 1);
202 else
203 fail_pos = (char *)digits_pos +
204 (fail_pos - copy);
207 PyMem_FREE(copy);
210 else {
211 val = strtod(digits_pos, &fail_pos);
214 if (fail_pos == digits_pos)
215 goto invalid_string;
217 got_val:
218 if (negate && fail_pos != nptr)
219 val = -val;
221 if (endptr)
222 *endptr = fail_pos;
224 return val;
226 invalid_string:
227 if (endptr)
228 *endptr = (char*)nptr;
229 errno = EINVAL;
230 return -1.0;
233 double
234 PyOS_ascii_atof(const char *nptr)
236 return PyOS_ascii_strtod(nptr, NULL);
240 /* Given a string that may have a decimal point in the current
241 locale, change it back to a dot. Since the string cannot get
242 longer, no need for a maximum buffer size parameter. */
243 Py_LOCAL_INLINE(void)
244 change_decimal_from_locale_to_dot(char* buffer)
246 struct lconv *locale_data = localeconv();
247 const char *decimal_point = locale_data->decimal_point;
249 if (decimal_point[0] != '.' || decimal_point[1] != 0) {
250 size_t decimal_point_len = strlen(decimal_point);
252 if (*buffer == '+' || *buffer == '-')
253 buffer++;
254 while (Py_ISDIGIT(*buffer))
255 buffer++;
256 if (strncmp(buffer, decimal_point, decimal_point_len) == 0) {
257 *buffer = '.';
258 buffer++;
259 if (decimal_point_len > 1) {
260 /* buffer needs to get smaller */
261 size_t rest_len = strlen(buffer +
262 (decimal_point_len - 1));
263 memmove(buffer,
264 buffer + (decimal_point_len - 1),
265 rest_len);
266 buffer[rest_len] = 0;
273 Py_LOCAL_INLINE(void)
274 ensure_sign(char* buffer, size_t buf_size)
276 size_t len;
278 if (buffer[0] == '-')
279 /* Already have a sign. */
280 return;
282 /* Include the trailing 0 byte. */
283 len = strlen(buffer)+1;
284 if (len >= buf_size+1)
285 /* No room for the sign, don't do anything. */
286 return;
288 memmove(buffer+1, buffer, len);
289 buffer[0] = '+';
292 /* From the C99 standard, section 7.19.6:
293 The exponent always contains at least two digits, and only as many more digits
294 as necessary to represent the exponent.
296 #define MIN_EXPONENT_DIGITS 2
298 /* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS
299 in length. */
300 Py_LOCAL_INLINE(void)
301 ensure_minimum_exponent_length(char* buffer, size_t buf_size)
303 char *p = strpbrk(buffer, "eE");
304 if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
305 char *start = p + 2;
306 int exponent_digit_cnt = 0;
307 int leading_zero_cnt = 0;
308 int in_leading_zeros = 1;
309 int significant_digit_cnt;
311 /* Skip over the exponent and the sign. */
312 p += 2;
314 /* Find the end of the exponent, keeping track of leading
315 zeros. */
316 while (*p && Py_ISDIGIT(*p)) {
317 if (in_leading_zeros && *p == '0')
318 ++leading_zero_cnt;
319 if (*p != '0')
320 in_leading_zeros = 0;
321 ++p;
322 ++exponent_digit_cnt;
325 significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
326 if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
327 /* If there are 2 exactly digits, we're done,
328 regardless of what they contain */
330 else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
331 int extra_zeros_cnt;
333 /* There are more than 2 digits in the exponent. See
334 if we can delete some of the leading zeros */
335 if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
336 significant_digit_cnt = MIN_EXPONENT_DIGITS;
337 extra_zeros_cnt = exponent_digit_cnt -
338 significant_digit_cnt;
340 /* Delete extra_zeros_cnt worth of characters from the
341 front of the exponent */
342 assert(extra_zeros_cnt >= 0);
344 /* Add one to significant_digit_cnt to copy the
345 trailing 0 byte, thus setting the length */
346 memmove(start,
347 start + extra_zeros_cnt,
348 significant_digit_cnt + 1);
350 else {
351 /* If there are fewer than 2 digits, add zeros
352 until there are 2, if there's enough room */
353 int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
354 if (start + zeros + exponent_digit_cnt + 1
355 < buffer + buf_size) {
356 memmove(start + zeros, start,
357 exponent_digit_cnt + 1);
358 memset(start, '0', zeros);
364 /* Remove trailing zeros after the decimal point from a numeric string; also
365 remove the decimal point if all digits following it are zero. The numeric
366 string must end in '\0', and should not have any leading or trailing
367 whitespace. Assumes that the decimal point is '.'. */
368 Py_LOCAL_INLINE(void)
369 remove_trailing_zeros(char *buffer)
371 char *old_fraction_end, *new_fraction_end, *end, *p;
373 p = buffer;
374 if (*p == '-' || *p == '+')
375 /* Skip leading sign, if present */
376 ++p;
377 while (Py_ISDIGIT(*p))
378 ++p;
380 /* if there's no decimal point there's nothing to do */
381 if (*p++ != '.')
382 return;
384 /* scan any digits after the point */
385 while (Py_ISDIGIT(*p))
386 ++p;
387 old_fraction_end = p;
389 /* scan up to ending '\0' */
390 while (*p != '\0')
391 p++;
392 /* +1 to make sure that we move the null byte as well */
393 end = p+1;
395 /* scan back from fraction_end, looking for removable zeros */
396 p = old_fraction_end;
397 while (*(p-1) == '0')
398 --p;
399 /* and remove point if we've got that far */
400 if (*(p-1) == '.')
401 --p;
402 new_fraction_end = p;
404 memmove(new_fraction_end, old_fraction_end, end-old_fraction_end);
407 /* Ensure that buffer has a decimal point in it. The decimal point will not
408 be in the current locale, it will always be '.'. Don't add a decimal point
409 if an exponent is present. Also, convert to exponential notation where
410 adding a '.0' would produce too many significant digits (see issue 5864).
412 Returns a pointer to the fixed buffer, or NULL on failure.
414 Py_LOCAL_INLINE(char *)
415 ensure_decimal_point(char* buffer, size_t buf_size, int precision)
417 int digit_count, insert_count = 0, convert_to_exp = 0;
418 char* chars_to_insert, *digits_start;
420 /* search for the first non-digit character */
421 char *p = buffer;
422 if (*p == '-' || *p == '+')
423 /* Skip leading sign, if present. I think this could only
424 ever be '-', but it can't hurt to check for both. */
425 ++p;
426 digits_start = p;
427 while (*p && Py_ISDIGIT(*p))
428 ++p;
429 digit_count = Py_SAFE_DOWNCAST(p - digits_start, Py_ssize_t, int);
431 if (*p == '.') {
432 if (Py_ISDIGIT(*(p+1))) {
433 /* Nothing to do, we already have a decimal
434 point and a digit after it */
436 else {
437 /* We have a decimal point, but no following
438 digit. Insert a zero after the decimal. */
439 /* can't ever get here via PyOS_double_to_string */
440 assert(precision == -1);
441 ++p;
442 chars_to_insert = "0";
443 insert_count = 1;
446 else if (!(*p == 'e' || *p == 'E')) {
447 /* Don't add ".0" if we have an exponent. */
448 if (digit_count == precision) {
449 /* issue 5864: don't add a trailing .0 in the case
450 where the '%g'-formatted result already has as many
451 significant digits as were requested. Switch to
452 exponential notation instead. */
453 convert_to_exp = 1;
454 /* no exponent, no point, and we shouldn't land here
455 for infs and nans, so we must be at the end of the
456 string. */
457 assert(*p == '\0');
459 else {
460 assert(precision == -1 || digit_count < precision);
461 chars_to_insert = ".0";
462 insert_count = 2;
465 if (insert_count) {
466 size_t buf_len = strlen(buffer);
467 if (buf_len + insert_count + 1 >= buf_size) {
468 /* If there is not enough room in the buffer
469 for the additional text, just skip it. It's
470 not worth generating an error over. */
472 else {
473 memmove(p + insert_count, p,
474 buffer + strlen(buffer) - p + 1);
475 memcpy(p, chars_to_insert, insert_count);
478 if (convert_to_exp) {
479 int written;
480 size_t buf_avail;
481 p = digits_start;
482 /* insert decimal point */
483 assert(digit_count >= 1);
484 memmove(p+2, p+1, digit_count); /* safe, but overwrites nul */
485 p[1] = '.';
486 p += digit_count+1;
487 assert(p <= buf_size+buffer);
488 buf_avail = buf_size+buffer-p;
489 if (buf_avail == 0)
490 return NULL;
491 /* Add exponent. It's okay to use lower case 'e': we only
492 arrive here as a result of using the empty format code or
493 repr/str builtins and those never want an upper case 'E' */
494 written = PyOS_snprintf(p, buf_avail, "e%+.02d", digit_count-1);
495 if (!(0 <= written &&
496 written < Py_SAFE_DOWNCAST(buf_avail, size_t, int)))
497 /* output truncated, or something else bad happened */
498 return NULL;
499 remove_trailing_zeros(buffer);
501 return buffer;
504 /* see FORMATBUFLEN in unicodeobject.c */
505 #define FLOAT_FORMATBUFLEN 120
508 * _PyOS_ascii_formatd:
509 * @buffer: A buffer to place the resulting string in
510 * @buf_size: The length of the buffer.
511 * @format: The printf()-style format to use for the
512 * code to use for converting.
513 * @d: The #gdouble to convert
515 * Converts a #gdouble to a string, using the '.' as
516 * decimal point. To format the number you pass in
517 * a printf()-style format string. Allowed conversion
518 * specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'Z'.
520 * 'Z' is the same as 'g', except it always has a decimal and
521 * at least one digit after the decimal.
523 * Return value: The pointer to the buffer with the converted string.
524 * On failure returns NULL but does not set any Python exception.
526 /* DEPRECATED, will be deleted in 2.8 and 3.2 */
527 PyAPI_FUNC(char *)
528 PyOS_ascii_formatd(char *buffer,
529 size_t buf_size,
530 const char *format,
531 double d)
533 char format_char;
534 size_t format_len = strlen(format);
536 /* Issue 2264: code 'Z' requires copying the format. 'Z' is 'g', but
537 also with at least one character past the decimal. */
538 char tmp_format[FLOAT_FORMATBUFLEN];
540 if (PyErr_WarnEx(PyExc_DeprecationWarning,
541 "PyOS_ascii_formatd is deprecated, "
542 "use PyOS_double_to_string instead", 1) < 0)
543 return NULL;
545 /* The last character in the format string must be the format char */
546 format_char = format[format_len - 1];
548 if (format[0] != '%')
549 return NULL;
551 /* I'm not sure why this test is here. It's ensuring that the format
552 string after the first character doesn't have a single quote, a
553 lowercase l, or a percent. This is the reverse of the commented-out
554 test about 10 lines ago. */
555 if (strpbrk(format + 1, "'l%"))
556 return NULL;
558 /* Also curious about this function is that it accepts format strings
559 like "%xg", which are invalid for floats. In general, the
560 interface to this function is not very good, but changing it is
561 difficult because it's a public API. */
563 if (!(format_char == 'e' || format_char == 'E' ||
564 format_char == 'f' || format_char == 'F' ||
565 format_char == 'g' || format_char == 'G' ||
566 format_char == 'Z'))
567 return NULL;
569 /* Map 'Z' format_char to 'g', by copying the format string and
570 replacing the final char with a 'g' */
571 if (format_char == 'Z') {
572 if (format_len + 1 >= sizeof(tmp_format)) {
573 /* The format won't fit in our copy. Error out. In
574 practice, this will never happen and will be
575 detected by returning NULL */
576 return NULL;
578 strcpy(tmp_format, format);
579 tmp_format[format_len - 1] = 'g';
580 format = tmp_format;
584 /* Have PyOS_snprintf do the hard work */
585 PyOS_snprintf(buffer, buf_size, format, d);
587 /* Do various fixups on the return string */
589 /* Get the current locale, and find the decimal point string.
590 Convert that string back to a dot. */
591 change_decimal_from_locale_to_dot(buffer);
593 /* If an exponent exists, ensure that the exponent is at least
594 MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
595 for the extra zeros. Also, if there are more than
596 MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
597 back to MIN_EXPONENT_DIGITS */
598 ensure_minimum_exponent_length(buffer, buf_size);
600 /* If format_char is 'Z', make sure we have at least one character
601 after the decimal point (and make sure we have a decimal point);
602 also switch to exponential notation in some edge cases where the
603 extra character would produce more significant digits that we
604 really want. */
605 if (format_char == 'Z')
606 buffer = ensure_decimal_point(buffer, buf_size, -1);
608 return buffer;
611 /* Precisions used by repr() and str(), respectively.
613 The repr() precision (17 significant decimal digits) is the minimal number
614 that is guaranteed to have enough precision so that if the number is read
615 back in the exact same binary value is recreated. This is true for IEEE
616 floating point by design, and also happens to work for all other modern
617 hardware.
619 The str() precision (12 significant decimal digits) is chosen so that in
620 most cases, the rounding noise created by various operations is suppressed,
621 while giving plenty of precision for practical use.
625 PyAPI_FUNC(void)
626 _PyOS_double_to_string(char *buf, size_t buf_len, double val,
627 char format_code, int precision,
628 int flags, int *ptype)
630 char format[32];
631 int t;
632 int upper = 0;
634 if (buf_len < 1) {
635 assert(0);
636 /* There's no way to signal this error. Just return. */
637 return;
639 buf[0] = 0;
641 /* Validate format_code, and map upper and lower case */
642 switch (format_code) {
643 case 'e': /* exponent */
644 case 'f': /* fixed */
645 case 'g': /* general */
646 break;
647 case 'E':
648 upper = 1;
649 format_code = 'e';
650 break;
651 case 'F':
652 upper = 1;
653 format_code = 'f';
654 break;
655 case 'G':
656 upper = 1;
657 format_code = 'g';
658 break;
659 case 'r': /* repr format */
660 /* Supplied precision is unused, must be 0. */
661 if (precision != 0)
662 return;
663 /* The repr() precision (17 significant decimal digits) is the
664 minimal number that is guaranteed to have enough precision
665 so that if the number is read back in the exact same binary
666 value is recreated. This is true for IEEE floating point
667 by design, and also happens to work for all other modern
668 hardware. */
669 precision = 17;
670 format_code = 'g';
671 break;
672 default:
673 assert(0);
674 return;
677 /* Check for buf too small to fit "-inf". Other buffer too small
678 conditions are dealt with when converting or formatting finite
679 numbers. */
680 if (buf_len < 5) {
681 assert(0);
682 return;
685 /* Handle nan and inf. */
686 if (Py_IS_NAN(val)) {
687 strcpy(buf, "nan");
688 t = Py_DTST_NAN;
689 } else if (Py_IS_INFINITY(val)) {
690 if (copysign(1., val) == 1.)
691 strcpy(buf, "inf");
692 else
693 strcpy(buf, "-inf");
694 t = Py_DTST_INFINITE;
695 } else {
696 t = Py_DTST_FINITE;
698 /* Build the format string. */
699 PyOS_snprintf(format, sizeof(format), "%%%s.%i%c",
700 (flags & Py_DTSF_ALT ? "#" : ""), precision,
701 format_code);
703 /* Have PyOS_snprintf do the hard work. */
704 PyOS_snprintf(buf, buf_len, format, val);
706 /* Do various fixups on the return string */
708 /* Get the current locale, and find the decimal point string.
709 Convert that string back to a dot. */
710 change_decimal_from_locale_to_dot(buf);
712 /* If an exponent exists, ensure that the exponent is at least
713 MIN_EXPONENT_DIGITS digits, providing the buffer is large
714 enough for the extra zeros. Also, if there are more than
715 MIN_EXPONENT_DIGITS, remove as many zeros as possible until
716 we get back to MIN_EXPONENT_DIGITS */
717 ensure_minimum_exponent_length(buf, buf_len);
719 /* Possibly make sure we have at least one character after the
720 decimal point (and make sure we have a decimal point). */
721 if (flags & Py_DTSF_ADD_DOT_0)
722 buf = ensure_decimal_point(buf, buf_len, precision);
725 /* Add the sign if asked and the result isn't negative. */
726 if (flags & Py_DTSF_SIGN && buf[0] != '-')
727 ensure_sign(buf, buf_len);
729 if (upper) {
730 /* Convert to upper case. */
731 char *p;
732 for (p = buf; *p; p++)
733 *p = Py_TOUPPER(*p);
736 if (ptype)
737 *ptype = t;
741 PyAPI_FUNC(char *) PyOS_double_to_string(double val,
742 char format_code,
743 int precision,
744 int flags,
745 int *ptype)
747 char buf[128];
748 Py_ssize_t len;
749 char *result;
751 _PyOS_double_to_string(buf, sizeof(buf), val, format_code, precision,
752 flags, ptype);
753 len = strlen(buf);
754 if (len == 0) {
755 PyErr_BadInternalCall();
756 return NULL;
759 /* Add 1 for the trailing 0 byte. */
760 result = PyMem_Malloc(len + 1);
761 if (result == NULL) {
762 PyErr_NoMemory();
763 return NULL;
765 strcpy(result, buf);
767 return result;