1 /* -*- Mode: C; c-file-style: "python" -*- */
6 /* ascii character tests (as opposed to locale tests) */
7 #define ISSPACE(c) ((c) == ' ' || (c) == '\f' || (c) == '\n' || \
8 (c) == '\r' || (c) == '\t' || (c) == '\v')
9 #define ISDIGIT(c) ((c) >= '0' && (c) <= '9')
14 * @nptr: the string to convert to a numeric value.
15 * @endptr: if non-%NULL, it returns the character after
16 * the last character used in the conversion.
18 * Converts a string to a #gdouble value.
19 * This function behaves like the standard strtod() function
20 * does in the C locale. It does this without actually
21 * changing the current locale, since that would not be
24 * This function is typically used when reading configuration
25 * files or other non-user input that should be locale independent.
26 * To handle input from the user you should normally use the
27 * locale-sensitive system strtod() function.
29 * If the correct value would cause overflow, plus or minus %HUGE_VAL
30 * is returned (according to the sign of the value), and %ERANGE is
31 * stored in %errno. If the correct value would cause underflow,
32 * zero is returned and %ERANGE is stored in %errno.
33 * If memory allocation fails, %ENOMEM is stored in %errno.
35 * This function resets %errno before calling strtod() so that
36 * you can reliably detect overflow and underflow.
38 * Return value: the #gdouble value.
41 PyOS_ascii_strtod(const char *nptr
, char **endptr
)
45 struct lconv
*locale_data
;
46 const char *decimal_point
;
47 size_t decimal_point_len
;
48 const char *p
, *decimal_point_pos
;
49 const char *end
= NULL
; /* Silence gcc */
50 const char *digits_pos
= NULL
;
57 locale_data
= localeconv();
58 decimal_point
= locale_data
->decimal_point
;
59 decimal_point_len
= strlen(decimal_point
);
61 assert(decimal_point_len
!= 0);
63 decimal_point_pos
= NULL
;
65 /* We process any leading whitespace and the optional sign manually,
66 then pass the remainder to the system strtod. This ensures that
67 the result of an underflow has the correct sign. (bug #1725) */
70 /* Skip leading space */
74 /* Process leading sign, if present */
78 } else if (*p
== '+') {
82 /* What's left should begin with a digit, a decimal point, or one of
83 the letters i, I, n, N. It should not begin with 0x or 0X */
85 *p
!= '.' && *p
!= 'i' && *p
!= 'I' && *p
!= 'n' && *p
!= 'N')
87 (*p
== '0' && (p
[1] == 'x' || p
[1] == 'X')))
90 *endptr
= (char*)nptr
;
96 if (decimal_point
[0] != '.' ||
97 decimal_point
[1] != 0)
104 decimal_point_pos
= p
++;
109 if (*p
== 'e' || *p
== 'E')
111 if (*p
== '+' || *p
== '-')
117 else if (strncmp(p
, decimal_point
, decimal_point_len
) == 0)
119 /* Python bug #1417699 */
121 *endptr
= (char*)nptr
;
125 /* For the other cases, we need not convert the decimal
129 /* Set errno to zero, so that we can distinguish zero results
133 if (decimal_point_pos
)
137 /* We need to convert the '.' to the locale specific decimal
139 copy
= (char *)PyMem_MALLOC(end
- digits_pos
+
140 1 + decimal_point_len
);
143 *endptr
= (char *)nptr
;
149 memcpy(c
, digits_pos
, decimal_point_pos
- digits_pos
);
150 c
+= decimal_point_pos
- digits_pos
;
151 memcpy(c
, decimal_point
, decimal_point_len
);
152 c
+= decimal_point_len
;
153 memcpy(c
, decimal_point_pos
+ 1,
154 end
- (decimal_point_pos
+ 1));
155 c
+= end
- (decimal_point_pos
+ 1);
158 val
= strtod(copy
, &fail_pos
);
162 if (fail_pos
> decimal_point_pos
)
163 fail_pos
= (char *)digits_pos
+
165 (decimal_point_len
- 1);
167 fail_pos
= (char *)digits_pos
+
175 val
= strtod(digits_pos
, &fail_pos
);
178 if (fail_pos
== digits_pos
)
179 fail_pos
= (char *)nptr
;
181 if (negate
&& fail_pos
!= nptr
)
190 /* Given a string that may have a decimal point in the current
191 locale, change it back to a dot. Since the string cannot get
192 longer, no need for a maximum buffer size parameter. */
193 Py_LOCAL_INLINE(void)
194 change_decimal_from_locale_to_dot(char* buffer
)
196 struct lconv
*locale_data
= localeconv();
197 const char *decimal_point
= locale_data
->decimal_point
;
199 if (decimal_point
[0] != '.' || decimal_point
[1] != 0) {
200 size_t decimal_point_len
= strlen(decimal_point
);
202 if (*buffer
== '+' || *buffer
== '-')
204 while (isdigit(Py_CHARMASK(*buffer
)))
206 if (strncmp(buffer
, decimal_point
, decimal_point_len
) == 0) {
209 if (decimal_point_len
> 1) {
210 /* buffer needs to get smaller */
211 size_t rest_len
= strlen(buffer
+
212 (decimal_point_len
- 1));
214 buffer
+ (decimal_point_len
- 1),
216 buffer
[rest_len
] = 0;
223 /* From the C99 standard, section 7.19.6:
224 The exponent always contains at least two digits, and only as many more digits
225 as necessary to represent the exponent.
227 #define MIN_EXPONENT_DIGITS 2
229 /* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS
231 Py_LOCAL_INLINE(void)
232 ensure_minumim_exponent_length(char* buffer
, size_t buf_size
)
234 char *p
= strpbrk(buffer
, "eE");
235 if (p
&& (*(p
+ 1) == '-' || *(p
+ 1) == '+')) {
237 int exponent_digit_cnt
= 0;
238 int leading_zero_cnt
= 0;
239 int in_leading_zeros
= 1;
240 int significant_digit_cnt
;
242 /* Skip over the exponent and the sign. */
245 /* Find the end of the exponent, keeping track of leading
247 while (*p
&& isdigit(Py_CHARMASK(*p
))) {
248 if (in_leading_zeros
&& *p
== '0')
251 in_leading_zeros
= 0;
253 ++exponent_digit_cnt
;
256 significant_digit_cnt
= exponent_digit_cnt
- leading_zero_cnt
;
257 if (exponent_digit_cnt
== MIN_EXPONENT_DIGITS
) {
258 /* If there are 2 exactly digits, we're done,
259 regardless of what they contain */
261 else if (exponent_digit_cnt
> MIN_EXPONENT_DIGITS
) {
264 /* There are more than 2 digits in the exponent. See
265 if we can delete some of the leading zeros */
266 if (significant_digit_cnt
< MIN_EXPONENT_DIGITS
)
267 significant_digit_cnt
= MIN_EXPONENT_DIGITS
;
268 extra_zeros_cnt
= exponent_digit_cnt
-
269 significant_digit_cnt
;
271 /* Delete extra_zeros_cnt worth of characters from the
272 front of the exponent */
273 assert(extra_zeros_cnt
>= 0);
275 /* Add one to significant_digit_cnt to copy the
276 trailing 0 byte, thus setting the length */
278 start
+ extra_zeros_cnt
,
279 significant_digit_cnt
+ 1);
282 /* If there are fewer than 2 digits, add zeros
283 until there are 2, if there's enough room */
284 int zeros
= MIN_EXPONENT_DIGITS
- exponent_digit_cnt
;
285 if (start
+ zeros
+ exponent_digit_cnt
+ 1
286 < buffer
+ buf_size
) {
287 memmove(start
+ zeros
, start
,
288 exponent_digit_cnt
+ 1);
289 memset(start
, '0', zeros
);
295 /* Ensure that buffer has a decimal point in it. The decimal point
296 will not be in the current locale, it will always be '.' */
297 Py_LOCAL_INLINE(void)
298 ensure_decimal_point(char* buffer
, size_t buf_size
)
300 int insert_count
= 0;
301 char* chars_to_insert
;
303 /* search for the first non-digit character */
305 while (*p
&& isdigit(Py_CHARMASK(*p
)))
309 if (isdigit(Py_CHARMASK(*(p
+1)))) {
310 /* Nothing to do, we already have a decimal
311 point and a digit after it */
314 /* We have a decimal point, but no following
315 digit. Insert a zero after the decimal. */
317 chars_to_insert
= "0";
322 chars_to_insert
= ".0";
326 size_t buf_len
= strlen(buffer
);
327 if (buf_len
+ insert_count
+ 1 >= buf_size
) {
328 /* If there is not enough room in the buffer
329 for the additional text, just skip it. It's
330 not worth generating an error over. */
333 memmove(p
+ insert_count
, p
,
334 buffer
+ strlen(buffer
) - p
+ 1);
335 memcpy(p
, chars_to_insert
, insert_count
);
340 /* Add the locale specific grouping characters to buffer. Note
341 that any decimal point (if it's present) in buffer is already
342 locale-specific. Return 0 on error, else 1. */
344 add_thousands_grouping(char* buffer
, size_t buf_size
)
346 Py_ssize_t len
= strlen(buffer
);
347 struct lconv
*locale_data
= localeconv();
348 const char *decimal_point
= locale_data
->decimal_point
;
350 /* Find the decimal point, if any. We're only concerned
351 about the characters to the left of the decimal when
353 char *p
= strstr(buffer
, decimal_point
);
355 /* No decimal, use the entire string. */
357 /* If any exponent, adjust p. */
358 p
= strpbrk(buffer
, "eE");
360 /* No exponent and no decimal. Use the entire
364 /* At this point, p points just past the right-most character we
365 want to format. We need to add the grouping string for the
366 characters between buffer and p. */
367 return _PyBytes_InsertThousandsGrouping(buffer
, len
, p
,
371 /* see FORMATBUFLEN in unicodeobject.c */
372 #define FLOAT_FORMATBUFLEN 120
375 * PyOS_ascii_formatd:
376 * @buffer: A buffer to place the resulting string in
377 * @buf_size: The length of the buffer.
378 * @format: The printf()-style format to use for the
379 * code to use for converting.
380 * @d: The #gdouble to convert
382 * Converts a #gdouble to a string, using the '.' as
383 * decimal point. To format the number you pass in
384 * a printf()-style format string. Allowed conversion
385 * specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'n'.
387 * 'n' is the same as 'g', except it uses the current locale.
388 * 'Z' is the same as 'g', except it always has a decimal and
389 * at least one digit after the decimal.
391 * Return value: The pointer to the buffer with the converted string.
394 PyOS_ascii_formatd(char *buffer
,
400 size_t format_len
= strlen(format
);
402 /* For type 'n', we need to make a copy of the format string, because
403 we're going to modify 'n' -> 'g', and format is const char*, so we
404 can't modify it directly. FLOAT_FORMATBUFLEN should be longer than
405 we ever need this to be. There's an upcoming check to ensure it's
407 /* Issue 2264: code 'Z' requires copying the format. 'Z' is 'g', but
408 also with at least one character past the decimal. */
409 char tmp_format
[FLOAT_FORMATBUFLEN
];
411 /* The last character in the format string must be the format char */
412 format_char
= format
[format_len
- 1];
414 if (format
[0] != '%')
417 /* I'm not sure why this test is here. It's ensuring that the format
418 string after the first character doesn't have a single quote, a
419 lowercase l, or a percent. This is the reverse of the commented-out
420 test about 10 lines ago. */
421 if (strpbrk(format
+ 1, "'l%"))
424 /* Also curious about this function is that it accepts format strings
425 like "%xg", which are invalid for floats. In general, the
426 interface to this function is not very good, but changing it is
427 difficult because it's a public API. */
429 if (!(format_char
== 'e' || format_char
== 'E' ||
430 format_char
== 'f' || format_char
== 'F' ||
431 format_char
== 'g' || format_char
== 'G' ||
432 format_char
== 'n' || format_char
== 'Z'))
435 /* Map 'n' or 'Z' format_char to 'g', by copying the format string and
436 replacing the final char with a 'g' */
437 if (format_char
== 'n' || format_char
== 'Z') {
438 if (format_len
+ 1 >= sizeof(tmp_format
)) {
439 /* The format won't fit in our copy. Error out. In
440 practice, this will never happen and will be
441 detected by returning NULL */
444 strcpy(tmp_format
, format
);
445 tmp_format
[format_len
- 1] = 'g';
450 /* Have PyOS_snprintf do the hard work */
451 PyOS_snprintf(buffer
, buf_size
, format
, d
);
453 /* Do various fixups on the return string */
455 /* Get the current locale, and find the decimal point string.
456 Convert that string back to a dot. Do not do this if using the
457 'n' (number) format code, since we want to keep the localized
458 decimal point in that case. */
459 if (format_char
!= 'n')
460 change_decimal_from_locale_to_dot(buffer
);
462 /* If an exponent exists, ensure that the exponent is at least
463 MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
464 for the extra zeros. Also, if there are more than
465 MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
466 back to MIN_EXPONENT_DIGITS */
467 ensure_minumim_exponent_length(buffer
, buf_size
);
469 /* If format_char is 'Z', make sure we have at least one character
470 after the decimal point (and make sure we have a decimal point). */
471 if (format_char
== 'Z')
472 ensure_decimal_point(buffer
, buf_size
);
474 /* If format_char is 'n', add the thousands grouping. */
475 if (format_char
== 'n')
476 if (!add_thousands_grouping(buffer
, buf_size
))
483 PyOS_ascii_atof(const char *nptr
)
485 return PyOS_ascii_strtod(nptr
, NULL
);