Issue #5816:
[python.git] / Python / pystrtod.c
blob703ae64563f0c50d2cfdfdaeb427d2cf62c7813c
1 /* -*- Mode: C; c-file-style: "python" -*- */
3 #include <Python.h>
4 #include <locale.h>
6 /* ascii character tests (as opposed to locale tests) */
7 #define ISSPACE(c) ((c) == ' ' || (c) == '\f' || (c) == '\n' || \
8 (c) == '\r' || (c) == '\t' || (c) == '\v')
9 #define ISDIGIT(c) ((c) >= '0' && (c) <= '9')
12 /**
13 * PyOS_ascii_strtod:
14 * @nptr: the string to convert to a numeric value.
15 * @endptr: if non-%NULL, it returns the character after
16 * the last character used in the conversion.
18 * Converts a string to a #gdouble value.
19 * This function behaves like the standard strtod() function
20 * does in the C locale. It does this without actually
21 * changing the current locale, since that would not be
22 * thread-safe.
24 * This function is typically used when reading configuration
25 * files or other non-user input that should be locale independent.
26 * To handle input from the user you should normally use the
27 * locale-sensitive system strtod() function.
29 * If the correct value would cause overflow, plus or minus %HUGE_VAL
30 * is returned (according to the sign of the value), and %ERANGE is
31 * stored in %errno. If the correct value would cause underflow,
32 * zero is returned and %ERANGE is stored in %errno.
33 * If memory allocation fails, %ENOMEM is stored in %errno.
35 * This function resets %errno before calling strtod() so that
36 * you can reliably detect overflow and underflow.
38 * Return value: the #gdouble value.
39 **/
42 Use system strtod; since strtod is locale aware, we may
43 have to first fix the decimal separator.
45 Note that unlike _Py_dg_strtod, the system strtod may not always give
46 correctly rounded results.
49 double
50 PyOS_ascii_strtod(const char *nptr, char **endptr)
52 char *fail_pos;
53 double val = -1.0;
54 struct lconv *locale_data;
55 const char *decimal_point;
56 size_t decimal_point_len;
57 const char *p, *decimal_point_pos;
58 const char *end = NULL; /* Silence gcc */
59 const char *digits_pos = NULL;
60 int negate = 0;
62 assert(nptr != NULL);
64 fail_pos = NULL;
66 locale_data = localeconv();
67 decimal_point = locale_data->decimal_point;
68 decimal_point_len = strlen(decimal_point);
70 assert(decimal_point_len != 0);
72 decimal_point_pos = NULL;
74 /* We process any leading whitespace and the optional sign manually,
75 then pass the remainder to the system strtod. This ensures that
76 the result of an underflow has the correct sign. (bug #1725) */
78 p = nptr;
79 /* Skip leading space */
80 while (ISSPACE(*p))
81 p++;
83 /* Process leading sign, if present */
84 if (*p == '-') {
85 negate = 1;
86 p++;
87 } else if (*p == '+') {
88 p++;
91 /* What's left should begin with a digit, a decimal point, or one of
92 the letters i, I, n, N. It should not begin with 0x or 0X */
93 if ((!ISDIGIT(*p) &&
94 *p != '.' && *p != 'i' && *p != 'I' && *p != 'n' && *p != 'N')
96 (*p == '0' && (p[1] == 'x' || p[1] == 'X')))
98 if (endptr)
99 *endptr = (char*)nptr;
100 errno = EINVAL;
101 return val;
103 digits_pos = p;
105 if (decimal_point[0] != '.' ||
106 decimal_point[1] != 0)
108 while (ISDIGIT(*p))
109 p++;
111 if (*p == '.')
113 decimal_point_pos = p++;
115 while (ISDIGIT(*p))
116 p++;
118 if (*p == 'e' || *p == 'E')
119 p++;
120 if (*p == '+' || *p == '-')
121 p++;
122 while (ISDIGIT(*p))
123 p++;
124 end = p;
126 else if (strncmp(p, decimal_point, decimal_point_len) == 0)
128 /* Python bug #1417699 */
129 if (endptr)
130 *endptr = (char*)nptr;
131 errno = EINVAL;
132 return val;
134 /* For the other cases, we need not convert the decimal
135 point */
138 /* Set errno to zero, so that we can distinguish zero results
139 and underflows */
140 errno = 0;
142 if (decimal_point_pos)
144 char *copy, *c;
146 /* We need to convert the '.' to the locale specific decimal
147 point */
148 copy = (char *)PyMem_MALLOC(end - digits_pos +
149 1 + decimal_point_len);
150 if (copy == NULL) {
151 if (endptr)
152 *endptr = (char *)nptr;
153 errno = ENOMEM;
154 return val;
157 c = copy;
158 memcpy(c, digits_pos, decimal_point_pos - digits_pos);
159 c += decimal_point_pos - digits_pos;
160 memcpy(c, decimal_point, decimal_point_len);
161 c += decimal_point_len;
162 memcpy(c, decimal_point_pos + 1,
163 end - (decimal_point_pos + 1));
164 c += end - (decimal_point_pos + 1);
165 *c = 0;
167 val = strtod(copy, &fail_pos);
169 if (fail_pos)
171 if (fail_pos > decimal_point_pos)
172 fail_pos = (char *)digits_pos +
173 (fail_pos - copy) -
174 (decimal_point_len - 1);
175 else
176 fail_pos = (char *)digits_pos +
177 (fail_pos - copy);
180 PyMem_FREE(copy);
183 else {
184 val = strtod(digits_pos, &fail_pos);
187 if (fail_pos == digits_pos)
188 fail_pos = (char *)nptr;
190 if (negate && fail_pos != nptr)
191 val = -val;
193 if (endptr)
194 *endptr = fail_pos;
196 return val;
199 double
200 PyOS_ascii_atof(const char *nptr)
202 return PyOS_ascii_strtod(nptr, NULL);
206 /* Given a string that may have a decimal point in the current
207 locale, change it back to a dot. Since the string cannot get
208 longer, no need for a maximum buffer size parameter. */
209 Py_LOCAL_INLINE(void)
210 change_decimal_from_locale_to_dot(char* buffer)
212 struct lconv *locale_data = localeconv();
213 const char *decimal_point = locale_data->decimal_point;
215 if (decimal_point[0] != '.' || decimal_point[1] != 0) {
216 size_t decimal_point_len = strlen(decimal_point);
218 if (*buffer == '+' || *buffer == '-')
219 buffer++;
220 while (isdigit(Py_CHARMASK(*buffer)))
221 buffer++;
222 if (strncmp(buffer, decimal_point, decimal_point_len) == 0) {
223 *buffer = '.';
224 buffer++;
225 if (decimal_point_len > 1) {
226 /* buffer needs to get smaller */
227 size_t rest_len = strlen(buffer +
228 (decimal_point_len - 1));
229 memmove(buffer,
230 buffer + (decimal_point_len - 1),
231 rest_len);
232 buffer[rest_len] = 0;
239 /* From the C99 standard, section 7.19.6:
240 The exponent always contains at least two digits, and only as many more digits
241 as necessary to represent the exponent.
243 #define MIN_EXPONENT_DIGITS 2
245 /* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS
246 in length. */
247 Py_LOCAL_INLINE(void)
248 ensure_minumim_exponent_length(char* buffer, size_t buf_size)
250 char *p = strpbrk(buffer, "eE");
251 if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
252 char *start = p + 2;
253 int exponent_digit_cnt = 0;
254 int leading_zero_cnt = 0;
255 int in_leading_zeros = 1;
256 int significant_digit_cnt;
258 /* Skip over the exponent and the sign. */
259 p += 2;
261 /* Find the end of the exponent, keeping track of leading
262 zeros. */
263 while (*p && isdigit(Py_CHARMASK(*p))) {
264 if (in_leading_zeros && *p == '0')
265 ++leading_zero_cnt;
266 if (*p != '0')
267 in_leading_zeros = 0;
268 ++p;
269 ++exponent_digit_cnt;
272 significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
273 if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
274 /* If there are 2 exactly digits, we're done,
275 regardless of what they contain */
277 else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
278 int extra_zeros_cnt;
280 /* There are more than 2 digits in the exponent. See
281 if we can delete some of the leading zeros */
282 if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
283 significant_digit_cnt = MIN_EXPONENT_DIGITS;
284 extra_zeros_cnt = exponent_digit_cnt -
285 significant_digit_cnt;
287 /* Delete extra_zeros_cnt worth of characters from the
288 front of the exponent */
289 assert(extra_zeros_cnt >= 0);
291 /* Add one to significant_digit_cnt to copy the
292 trailing 0 byte, thus setting the length */
293 memmove(start,
294 start + extra_zeros_cnt,
295 significant_digit_cnt + 1);
297 else {
298 /* If there are fewer than 2 digits, add zeros
299 until there are 2, if there's enough room */
300 int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
301 if (start + zeros + exponent_digit_cnt + 1
302 < buffer + buf_size) {
303 memmove(start + zeros, start,
304 exponent_digit_cnt + 1);
305 memset(start, '0', zeros);
311 /* Ensure that buffer has a decimal point in it. The decimal point will not
312 be in the current locale, it will always be '.'. Don't add a decimal if an
313 exponent is present. */
314 Py_LOCAL_INLINE(void)
315 ensure_decimal_point(char* buffer, size_t buf_size)
317 int insert_count = 0;
318 char* chars_to_insert;
320 /* search for the first non-digit character */
321 char *p = buffer;
322 if (*p == '-' || *p == '+')
323 /* Skip leading sign, if present. I think this could only
324 ever be '-', but it can't hurt to check for both. */
325 ++p;
326 while (*p && isdigit(Py_CHARMASK(*p)))
327 ++p;
329 if (*p == '.') {
330 if (isdigit(Py_CHARMASK(*(p+1)))) {
331 /* Nothing to do, we already have a decimal
332 point and a digit after it */
334 else {
335 /* We have a decimal point, but no following
336 digit. Insert a zero after the decimal. */
337 ++p;
338 chars_to_insert = "0";
339 insert_count = 1;
342 else if (!(*p == 'e' || *p == 'E')) {
343 /* Don't add ".0" if we have an exponent. */
344 chars_to_insert = ".0";
345 insert_count = 2;
347 if (insert_count) {
348 size_t buf_len = strlen(buffer);
349 if (buf_len + insert_count + 1 >= buf_size) {
350 /* If there is not enough room in the buffer
351 for the additional text, just skip it. It's
352 not worth generating an error over. */
354 else {
355 memmove(p + insert_count, p,
356 buffer + strlen(buffer) - p + 1);
357 memcpy(p, chars_to_insert, insert_count);
362 /* see FORMATBUFLEN in unicodeobject.c */
363 #define FLOAT_FORMATBUFLEN 120
366 * PyOS_ascii_formatd:
367 * @buffer: A buffer to place the resulting string in
368 * @buf_size: The length of the buffer.
369 * @format: The printf()-style format to use for the
370 * code to use for converting.
371 * @d: The #gdouble to convert
373 * Converts a #gdouble to a string, using the '.' as
374 * decimal point. To format the number you pass in
375 * a printf()-style format string. Allowed conversion
376 * specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'Z'.
378 * 'Z' is the same as 'g', except it always has a decimal and
379 * at least one digit after the decimal.
381 * Return value: The pointer to the buffer with the converted string.
383 char *
384 PyOS_ascii_formatd(char *buffer,
385 size_t buf_size,
386 const char *format,
387 double d)
389 char format_char;
390 size_t format_len = strlen(format);
392 /* Issue 2264: code 'Z' requires copying the format. 'Z' is 'g', but
393 also with at least one character past the decimal. */
394 char tmp_format[FLOAT_FORMATBUFLEN];
396 /* The last character in the format string must be the format char */
397 format_char = format[format_len - 1];
399 if (format[0] != '%')
400 return NULL;
402 /* I'm not sure why this test is here. It's ensuring that the format
403 string after the first character doesn't have a single quote, a
404 lowercase l, or a percent. This is the reverse of the commented-out
405 test about 10 lines ago. */
406 if (strpbrk(format + 1, "'l%"))
407 return NULL;
409 /* Also curious about this function is that it accepts format strings
410 like "%xg", which are invalid for floats. In general, the
411 interface to this function is not very good, but changing it is
412 difficult because it's a public API. */
414 if (!(format_char == 'e' || format_char == 'E' ||
415 format_char == 'f' || format_char == 'F' ||
416 format_char == 'g' || format_char == 'G' ||
417 format_char == 'Z'))
418 return NULL;
420 /* Map 'Z' format_char to 'g', by copying the format string and
421 replacing the final char with a 'g' */
422 if (format_char == 'Z') {
423 if (format_len + 1 >= sizeof(tmp_format)) {
424 /* The format won't fit in our copy. Error out. In
425 practice, this will never happen and will be
426 detected by returning NULL */
427 return NULL;
429 strcpy(tmp_format, format);
430 tmp_format[format_len - 1] = 'g';
431 format = tmp_format;
435 /* Have PyOS_snprintf do the hard work */
436 PyOS_snprintf(buffer, buf_size, format, d);
438 /* Do various fixups on the return string */
440 /* Get the current locale, and find the decimal point string.
441 Convert that string back to a dot. */
442 change_decimal_from_locale_to_dot(buffer);
444 /* If an exponent exists, ensure that the exponent is at least
445 MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
446 for the extra zeros. Also, if there are more than
447 MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
448 back to MIN_EXPONENT_DIGITS */
449 ensure_minumim_exponent_length(buffer, buf_size);
451 /* If format_char is 'Z', make sure we have at least one character
452 after the decimal point (and make sure we have a decimal point). */
453 if (format_char == 'Z')
454 ensure_decimal_point(buffer, buf_size);
456 return buffer;
459 PyAPI_FUNC(char *) PyOS_double_to_string(double val,
460 char format_code,
461 int precision,
462 int flags,
463 int *type)
465 char buf[128];
466 char format[32];
467 Py_ssize_t len;
468 char *result;
469 char *p;
470 int t;
471 int upper = 0;
473 /* Validate format_code, and map upper and lower case */
474 switch (format_code) {
475 case 'e': /* exponent */
476 case 'f': /* fixed */
477 case 'g': /* general */
478 break;
479 case 'E':
480 upper = 1;
481 format_code = 'e';
482 break;
483 case 'F':
484 upper = 1;
485 format_code = 'f';
486 break;
487 case 'G':
488 upper = 1;
489 format_code = 'g';
490 break;
491 case 'r': /* repr format */
492 /* Supplied precision is unused, must be 0. */
493 if (precision != 0) {
494 PyErr_BadInternalCall();
495 return NULL;
497 precision = 17;
498 format_code = 'g';
499 break;
500 case 's': /* str format */
501 /* Supplied precision is unused, must be 0. */
502 if (precision != 0) {
503 PyErr_BadInternalCall();
504 return NULL;
506 precision = 12;
507 format_code = 'g';
508 break;
509 default:
510 PyErr_BadInternalCall();
511 return NULL;
514 /* Handle nan and inf. */
515 if (Py_IS_NAN(val)) {
516 strcpy(buf, "nan");
517 t = Py_DTST_NAN;
518 } else if (Py_IS_INFINITY(val)) {
519 if (copysign(1., val) == 1.)
520 strcpy(buf, "inf");
521 else
522 strcpy(buf, "-inf");
523 t = Py_DTST_INFINITE;
524 } else {
525 t = Py_DTST_FINITE;
528 if (flags & Py_DTSF_ADD_DOT_0)
529 format_code = 'Z';
531 PyOS_snprintf(format, 32, "%%%s.%i%c", (flags & Py_DTSF_ALT ? "#" : ""), precision, format_code);
532 PyOS_ascii_formatd(buf, sizeof(buf), format, val);
535 len = strlen(buf);
537 /* Add 1 for the trailing 0 byte.
538 Add 1 because we might need to make room for the sign.
540 result = PyMem_Malloc(len + 2);
541 if (result == NULL) {
542 PyErr_NoMemory();
543 return NULL;
545 p = result;
547 /* Add sign when requested. It's convenient (esp. when formatting
548 complex numbers) to include a sign even for inf and nan. */
549 if (flags & Py_DTSF_SIGN && buf[0] != '-')
550 *p++ = '+';
552 strcpy(p, buf);
554 if (upper) {
555 /* Convert to upper case. */
556 char *p1;
557 for (p1 = p; *p1; p1++)
558 *p1 = toupper(*p1);
561 if (type)
562 *type = t;
563 return result;