Backport r71967 changes from py3k to trunk.
[python.git] / Python / pystrtod.c
blob68161644fb5add58178dc6cd1f617d2c10401072
1 /* -*- Mode: C; c-file-style: "python" -*- */
3 #include <Python.h>
4 #include <locale.h>
6 /* ascii character tests (as opposed to locale tests) */
7 #define ISSPACE(c) ((c) == ' ' || (c) == '\f' || (c) == '\n' || \
8 (c) == '\r' || (c) == '\t' || (c) == '\v')
9 #define ISDIGIT(c) ((c) >= '0' && (c) <= '9')
12 /**
13 * PyOS_ascii_strtod:
14 * @nptr: the string to convert to a numeric value.
15 * @endptr: if non-%NULL, it returns the character after
16 * the last character used in the conversion.
18 * Converts a string to a #gdouble value.
19 * This function behaves like the standard strtod() function
20 * does in the C locale. It does this without actually
21 * changing the current locale, since that would not be
22 * thread-safe.
24 * This function is typically used when reading configuration
25 * files or other non-user input that should be locale independent.
26 * To handle input from the user you should normally use the
27 * locale-sensitive system strtod() function.
29 * If the correct value would cause overflow, plus or minus %HUGE_VAL
30 * is returned (according to the sign of the value), and %ERANGE is
31 * stored in %errno. If the correct value would cause underflow,
32 * zero is returned and %ERANGE is stored in %errno.
33 * If memory allocation fails, %ENOMEM is stored in %errno.
35 * This function resets %errno before calling strtod() so that
36 * you can reliably detect overflow and underflow.
38 * Return value: the #gdouble value.
39 **/
42 Use system strtod; since strtod is locale aware, we may
43 have to first fix the decimal separator.
45 Note that unlike _Py_dg_strtod, the system strtod may not always give
46 correctly rounded results.
49 double
50 PyOS_ascii_strtod(const char *nptr, char **endptr)
52 char *fail_pos;
53 double val = -1.0;
54 struct lconv *locale_data;
55 const char *decimal_point;
56 size_t decimal_point_len;
57 const char *p, *decimal_point_pos;
58 const char *end = NULL; /* Silence gcc */
59 const char *digits_pos = NULL;
60 int negate = 0;
62 assert(nptr != NULL);
64 fail_pos = NULL;
66 locale_data = localeconv();
67 decimal_point = locale_data->decimal_point;
68 decimal_point_len = strlen(decimal_point);
70 assert(decimal_point_len != 0);
72 decimal_point_pos = NULL;
74 /* Set errno to zero, so that we can distinguish zero results
75 and underflows */
76 errno = 0;
78 /* We process any leading whitespace and the optional sign manually,
79 then pass the remainder to the system strtod. This ensures that
80 the result of an underflow has the correct sign. (bug #1725) */
82 p = nptr;
83 /* Skip leading space */
84 while (ISSPACE(*p))
85 p++;
87 /* Process leading sign, if present */
88 if (*p == '-') {
89 negate = 1;
90 p++;
92 else if (*p == '+') {
93 p++;
96 /* Parse infinities and nans */
97 if (*p == 'i' || *p == 'I') {
98 if (PyOS_strnicmp(p, "inf", 3) == 0) {
99 val = Py_HUGE_VAL;
100 if (PyOS_strnicmp(p+3, "inity", 5) == 0)
101 fail_pos = (char *)p+8;
102 else
103 fail_pos = (char *)p+3;
104 goto got_val;
106 else
107 goto invalid_string;
109 #ifdef Py_NAN
110 if (*p == 'n' || *p == 'N') {
111 if (PyOS_strnicmp(p, "nan", 3) == 0) {
112 val = Py_NAN;
113 fail_pos = (char *)p+3;
114 goto got_val;
116 else
117 goto invalid_string;
119 #endif
121 /* Some platform strtods accept hex floats; Python shouldn't (at the
122 moment), so we check explicitly for strings starting with '0x'. */
123 if (*p == '0' && (*(p+1) == 'x' || *(p+1) == 'X'))
124 goto invalid_string;
126 /* Check that what's left begins with a digit or decimal point */
127 if (!ISDIGIT(*p) && *p != '.')
128 goto invalid_string;
130 digits_pos = p;
131 if (decimal_point[0] != '.' ||
132 decimal_point[1] != 0)
134 /* Look for a '.' in the input; if present, it'll need to be
135 swapped for the current locale's decimal point before we
136 call strtod. On the other hand, if we find the current
137 locale's decimal point then the input is invalid. */
138 while (ISDIGIT(*p))
139 p++;
141 if (*p == '.')
143 decimal_point_pos = p++;
145 /* locate end of number */
146 while (ISDIGIT(*p))
147 p++;
149 if (*p == 'e' || *p == 'E')
150 p++;
151 if (*p == '+' || *p == '-')
152 p++;
153 while (ISDIGIT(*p))
154 p++;
155 end = p;
157 else if (strncmp(p, decimal_point, decimal_point_len) == 0)
158 /* Python bug #1417699 */
159 goto invalid_string;
160 /* For the other cases, we need not convert the decimal
161 point */
164 if (decimal_point_pos) {
165 char *copy, *c;
166 /* Create a copy of the input, with the '.' converted to the
167 locale-specific decimal point */
168 copy = (char *)PyMem_MALLOC(end - digits_pos +
169 1 + decimal_point_len);
170 if (copy == NULL) {
171 if (endptr)
172 *endptr = (char *)nptr;
173 errno = ENOMEM;
174 return val;
177 c = copy;
178 memcpy(c, digits_pos, decimal_point_pos - digits_pos);
179 c += decimal_point_pos - digits_pos;
180 memcpy(c, decimal_point, decimal_point_len);
181 c += decimal_point_len;
182 memcpy(c, decimal_point_pos + 1,
183 end - (decimal_point_pos + 1));
184 c += end - (decimal_point_pos + 1);
185 *c = 0;
187 val = strtod(copy, &fail_pos);
189 if (fail_pos)
191 if (fail_pos > decimal_point_pos)
192 fail_pos = (char *)digits_pos +
193 (fail_pos - copy) -
194 (decimal_point_len - 1);
195 else
196 fail_pos = (char *)digits_pos +
197 (fail_pos - copy);
200 PyMem_FREE(copy);
203 else {
204 val = strtod(digits_pos, &fail_pos);
207 if (fail_pos == digits_pos)
208 goto invalid_string;
210 got_val:
211 if (negate && fail_pos != nptr)
212 val = -val;
214 if (endptr)
215 *endptr = fail_pos;
217 return val;
219 invalid_string:
220 if (endptr)
221 *endptr = (char*)nptr;
222 errno = EINVAL;
223 return -1.0;
226 double
227 PyOS_ascii_atof(const char *nptr)
229 return PyOS_ascii_strtod(nptr, NULL);
233 /* Given a string that may have a decimal point in the current
234 locale, change it back to a dot. Since the string cannot get
235 longer, no need for a maximum buffer size parameter. */
236 Py_LOCAL_INLINE(void)
237 change_decimal_from_locale_to_dot(char* buffer)
239 struct lconv *locale_data = localeconv();
240 const char *decimal_point = locale_data->decimal_point;
242 if (decimal_point[0] != '.' || decimal_point[1] != 0) {
243 size_t decimal_point_len = strlen(decimal_point);
245 if (*buffer == '+' || *buffer == '-')
246 buffer++;
247 while (isdigit(Py_CHARMASK(*buffer)))
248 buffer++;
249 if (strncmp(buffer, decimal_point, decimal_point_len) == 0) {
250 *buffer = '.';
251 buffer++;
252 if (decimal_point_len > 1) {
253 /* buffer needs to get smaller */
254 size_t rest_len = strlen(buffer +
255 (decimal_point_len - 1));
256 memmove(buffer,
257 buffer + (decimal_point_len - 1),
258 rest_len);
259 buffer[rest_len] = 0;
266 Py_LOCAL_INLINE(void)
267 ensure_sign(char* buffer, size_t buf_size)
269 Py_ssize_t len;
271 if (buffer[0] == '-')
272 /* Already have a sign. */
273 return;
275 /* Include the trailing 0 byte. */
276 len = strlen(buffer)+1;
277 if (len >= buf_size+1)
278 /* No room for the sign, don't do anything. */
279 return;
281 memmove(buffer+1, buffer, len);
282 buffer[0] = '+';
285 /* From the C99 standard, section 7.19.6:
286 The exponent always contains at least two digits, and only as many more digits
287 as necessary to represent the exponent.
289 #define MIN_EXPONENT_DIGITS 2
291 /* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS
292 in length. */
293 Py_LOCAL_INLINE(void)
294 ensure_minumim_exponent_length(char* buffer, size_t buf_size)
296 char *p = strpbrk(buffer, "eE");
297 if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
298 char *start = p + 2;
299 int exponent_digit_cnt = 0;
300 int leading_zero_cnt = 0;
301 int in_leading_zeros = 1;
302 int significant_digit_cnt;
304 /* Skip over the exponent and the sign. */
305 p += 2;
307 /* Find the end of the exponent, keeping track of leading
308 zeros. */
309 while (*p && isdigit(Py_CHARMASK(*p))) {
310 if (in_leading_zeros && *p == '0')
311 ++leading_zero_cnt;
312 if (*p != '0')
313 in_leading_zeros = 0;
314 ++p;
315 ++exponent_digit_cnt;
318 significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
319 if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
320 /* If there are 2 exactly digits, we're done,
321 regardless of what they contain */
323 else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
324 int extra_zeros_cnt;
326 /* There are more than 2 digits in the exponent. See
327 if we can delete some of the leading zeros */
328 if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
329 significant_digit_cnt = MIN_EXPONENT_DIGITS;
330 extra_zeros_cnt = exponent_digit_cnt -
331 significant_digit_cnt;
333 /* Delete extra_zeros_cnt worth of characters from the
334 front of the exponent */
335 assert(extra_zeros_cnt >= 0);
337 /* Add one to significant_digit_cnt to copy the
338 trailing 0 byte, thus setting the length */
339 memmove(start,
340 start + extra_zeros_cnt,
341 significant_digit_cnt + 1);
343 else {
344 /* If there are fewer than 2 digits, add zeros
345 until there are 2, if there's enough room */
346 int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
347 if (start + zeros + exponent_digit_cnt + 1
348 < buffer + buf_size) {
349 memmove(start + zeros, start,
350 exponent_digit_cnt + 1);
351 memset(start, '0', zeros);
357 /* Ensure that buffer has a decimal point in it. The decimal point will not
358 be in the current locale, it will always be '.'. Don't add a decimal if an
359 exponent is present. */
360 Py_LOCAL_INLINE(void)
361 ensure_decimal_point(char* buffer, size_t buf_size)
363 int insert_count = 0;
364 char* chars_to_insert;
366 /* search for the first non-digit character */
367 char *p = buffer;
368 if (*p == '-' || *p == '+')
369 /* Skip leading sign, if present. I think this could only
370 ever be '-', but it can't hurt to check for both. */
371 ++p;
372 while (*p && isdigit(Py_CHARMASK(*p)))
373 ++p;
375 if (*p == '.') {
376 if (isdigit(Py_CHARMASK(*(p+1)))) {
377 /* Nothing to do, we already have a decimal
378 point and a digit after it */
380 else {
381 /* We have a decimal point, but no following
382 digit. Insert a zero after the decimal. */
383 ++p;
384 chars_to_insert = "0";
385 insert_count = 1;
388 else if (!(*p == 'e' || *p == 'E')) {
389 /* Don't add ".0" if we have an exponent. */
390 chars_to_insert = ".0";
391 insert_count = 2;
393 if (insert_count) {
394 size_t buf_len = strlen(buffer);
395 if (buf_len + insert_count + 1 >= buf_size) {
396 /* If there is not enough room in the buffer
397 for the additional text, just skip it. It's
398 not worth generating an error over. */
400 else {
401 memmove(p + insert_count, p,
402 buffer + strlen(buffer) - p + 1);
403 memcpy(p, chars_to_insert, insert_count);
408 /* see FORMATBUFLEN in unicodeobject.c */
409 #define FLOAT_FORMATBUFLEN 120
412 * _PyOS_ascii_formatd:
413 * @buffer: A buffer to place the resulting string in
414 * @buf_size: The length of the buffer.
415 * @format: The printf()-style format to use for the
416 * code to use for converting.
417 * @d: The #gdouble to convert
419 * Converts a #gdouble to a string, using the '.' as
420 * decimal point. To format the number you pass in
421 * a printf()-style format string. Allowed conversion
422 * specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'Z'.
424 * 'Z' is the same as 'g', except it always has a decimal and
425 * at least one digit after the decimal.
427 * Return value: The pointer to the buffer with the converted string.
429 /* DEPRECATED, will be deleted in 2.8 and 3.2 */
430 PyAPI_FUNC(char *)
431 PyOS_ascii_formatd(char *buffer,
432 size_t buf_size,
433 const char *format,
434 double d)
436 char format_char;
437 size_t format_len = strlen(format);
439 /* Issue 2264: code 'Z' requires copying the format. 'Z' is 'g', but
440 also with at least one character past the decimal. */
441 char tmp_format[FLOAT_FORMATBUFLEN];
443 if (PyErr_WarnEx(PyExc_DeprecationWarning,
444 "PyOS_ascii_formatd is deprecated, "
445 "use PyOS_double_to_string instead", 1) < 0)
446 return NULL;
448 /* The last character in the format string must be the format char */
449 format_char = format[format_len - 1];
451 if (format[0] != '%')
452 return NULL;
454 /* I'm not sure why this test is here. It's ensuring that the format
455 string after the first character doesn't have a single quote, a
456 lowercase l, or a percent. This is the reverse of the commented-out
457 test about 10 lines ago. */
458 if (strpbrk(format + 1, "'l%"))
459 return NULL;
461 /* Also curious about this function is that it accepts format strings
462 like "%xg", which are invalid for floats. In general, the
463 interface to this function is not very good, but changing it is
464 difficult because it's a public API. */
466 if (!(format_char == 'e' || format_char == 'E' ||
467 format_char == 'f' || format_char == 'F' ||
468 format_char == 'g' || format_char == 'G' ||
469 format_char == 'Z'))
470 return NULL;
472 /* Map 'Z' format_char to 'g', by copying the format string and
473 replacing the final char with a 'g' */
474 if (format_char == 'Z') {
475 if (format_len + 1 >= sizeof(tmp_format)) {
476 /* The format won't fit in our copy. Error out. In
477 practice, this will never happen and will be
478 detected by returning NULL */
479 return NULL;
481 strcpy(tmp_format, format);
482 tmp_format[format_len - 1] = 'g';
483 format = tmp_format;
487 /* Have PyOS_snprintf do the hard work */
488 PyOS_snprintf(buffer, buf_size, format, d);
490 /* Do various fixups on the return string */
492 /* Get the current locale, and find the decimal point string.
493 Convert that string back to a dot. */
494 change_decimal_from_locale_to_dot(buffer);
496 /* If an exponent exists, ensure that the exponent is at least
497 MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
498 for the extra zeros. Also, if there are more than
499 MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
500 back to MIN_EXPONENT_DIGITS */
501 ensure_minumim_exponent_length(buffer, buf_size);
503 /* If format_char is 'Z', make sure we have at least one character
504 after the decimal point (and make sure we have a decimal point). */
505 if (format_char == 'Z')
506 ensure_decimal_point(buffer, buf_size);
508 return buffer;
511 PyAPI_FUNC(void)
512 _PyOS_double_to_string(char *buf, size_t buf_len, double val,
513 char format_code, int precision,
514 int flags, int *ptype)
516 char format[32];
517 int t;
518 int upper = 0;
520 if (buf_len < 1) {
521 assert(0);
522 /* There's no way to signal this error. Just return. */
523 return;
525 buf[0] = 0;
527 /* Validate format_code, and map upper and lower case */
528 switch (format_code) {
529 case 'e': /* exponent */
530 case 'f': /* fixed */
531 case 'g': /* general */
532 break;
533 case 'E':
534 upper = 1;
535 format_code = 'e';
536 break;
537 case 'F':
538 upper = 1;
539 format_code = 'f';
540 break;
541 case 'G':
542 upper = 1;
543 format_code = 'g';
544 break;
545 case 'r': /* repr format */
546 /* Supplied precision is unused, must be 0. */
547 if (precision != 0)
548 return;
549 precision = 17;
550 format_code = 'g';
551 break;
552 case 's': /* str format */
553 /* Supplied precision is unused, must be 0. */
554 if (precision != 0)
555 return;
556 precision = 12;
557 format_code = 'g';
558 break;
559 default:
560 assert(0);
561 return;
564 /* Check for buf too small to fit "-inf". Other buffer too small
565 conditions are dealt with when converting or formatting finite
566 numbers. */
567 if (buf_len < 5) {
568 assert(0);
569 return;
572 /* Handle nan and inf. */
573 if (Py_IS_NAN(val)) {
574 strcpy(buf, "nan");
575 t = Py_DTST_NAN;
576 } else if (Py_IS_INFINITY(val)) {
577 if (copysign(1., val) == 1.)
578 strcpy(buf, "inf");
579 else
580 strcpy(buf, "-inf");
581 t = Py_DTST_INFINITE;
582 } else {
583 t = Py_DTST_FINITE;
585 /* Build the format string. */
586 PyOS_snprintf(format, sizeof(format), "%%%s.%i%c",
587 (flags & Py_DTSF_ALT ? "#" : ""), precision,
588 format_code);
590 /* Have PyOS_snprintf do the hard work. */
591 PyOS_snprintf(buf, buf_len, format, val);
593 /* Do various fixups on the return string */
595 /* Get the current locale, and find the decimal point string.
596 Convert that string back to a dot. */
597 change_decimal_from_locale_to_dot(buf);
599 /* If an exponent exists, ensure that the exponent is at least
600 MIN_EXPONENT_DIGITS digits, providing the buffer is large
601 enough for the extra zeros. Also, if there are more than
602 MIN_EXPONENT_DIGITS, remove as many zeros as possible until
603 we get back to MIN_EXPONENT_DIGITS */
604 ensure_minumim_exponent_length(buf, buf_len);
606 /* Possibly make sure we have at least one character after the
607 decimal point (and make sure we have a decimal point). */
608 if (flags & Py_DTSF_ADD_DOT_0)
609 ensure_decimal_point(buf, buf_len);
612 /* Add the sign if asked and the result isn't negative. */
613 if (flags & Py_DTSF_SIGN && buf[0] != '-')
614 ensure_sign(buf, buf_len);
616 if (upper) {
617 /* Convert to upper case. */
618 char *p;
619 for (p = buf; *p; p++)
620 *p = toupper(*p);
623 if (ptype)
624 *ptype = t;
628 PyAPI_FUNC(char *) PyOS_double_to_string(double val,
629 char format_code,
630 int precision,
631 int flags,
632 int *ptype)
634 char buf[128];
635 Py_ssize_t len;
636 char *result;
638 _PyOS_double_to_string(buf, sizeof(buf), val, format_code, precision,
639 flags, ptype);
640 len = strlen(buf);
641 if (len == 0) {
642 PyErr_BadInternalCall();
643 return NULL;
646 /* Add 1 for the trailing 0 byte. */
647 result = PyMem_Malloc(len + 1);
648 if (result == NULL) {
649 PyErr_NoMemory();
650 return NULL;
652 strcpy(result, buf);
654 return result;