Python/pystrtod.c

   1 /* -*- Mode: C; c-file-style: "python" -*- */
   2
   3 #include <Python.h>
   4 #include <locale.h>
   5
   6 /* Case-insensitive string match used for nan and inf detection; t should be
   7    lower-case.  Returns 1 for a successful match, 0 otherwise. */
   8
   9 static int
  10 case_insensitive_match(const char *s, const char *t)
  11 {
  12         while(*t && Py_TOLOWER(*s) == *t) {
  13                 s++;
  14                 t++;
  15         }
  16         return *t ? 0 : 1;
  17 }
  18
  19 /* _Py_parse_inf_or_nan: Attempt to parse a string of the form "nan", "inf" or
  20    "infinity", with an optional leading sign of "+" or "-".  On success,
  21    return the NaN or Infinity as a double and set *endptr to point just beyond
  22    the successfully parsed portion of the string.  On failure, return -1.0 and
  23    set *endptr to point to the start of the string. */
  24
  25 double
  26 _Py_parse_inf_or_nan(const char *p, char **endptr)
  27 {
  28         double retval;
  29         const char *s;
  30         int negate = 0;
  31
  32         s = p;
  33         if (*s == '-') {
  34                 negate = 1;
  35                 s++;
  36         }
  37         else if (*s == '+') {
  38                 s++;
  39         }
  40         if (case_insensitive_match(s, "inf")) {
  41                 s += 3;
  42                 if (case_insensitive_match(s, "inity"))
  43                         s += 5;
  44                 retval = negate ? -Py_HUGE_VAL : Py_HUGE_VAL;
  45         }
  46 #ifdef Py_NAN
  47         else if (case_insensitive_match(s, "nan")) {
  48                 s += 3;
  49                 retval = negate ? -Py_NAN : Py_NAN;
  50         }
  51 #endif
  52         else {
  53                 s = p;
  54                 retval = -1.0;
  55         }
  56         *endptr = (char *)s;
  57         return retval;
  58 }
  59
  60 /**
  61  * PyOS_ascii_strtod:
  62  * @nptr:    the string to convert to a numeric value.
  63  * @endptr:  if non-%NULL, it returns the character after
  64  *           the last character used in the conversion.
  65  *
  66  * Converts a string to a #gdouble value.
  67  * This function behaves like the standard strtod() function
  68  * does in the C locale. It does this without actually
  69  * changing the current locale, since that would not be
  70  * thread-safe.
  71  *
  72  * This function is typically used when reading configuration
  73  * files or other non-user input that should be locale independent.
  74  * To handle input from the user you should normally use the
  75  * locale-sensitive system strtod() function.
  76  *
  77  * If the correct value would cause overflow, plus or minus %HUGE_VAL
  78  * is returned (according to the sign of the value), and %ERANGE is
  79  * stored in %errno. If the correct value would cause underflow,
  80  * zero is returned and %ERANGE is stored in %errno.
  81  * If memory allocation fails, %ENOMEM is stored in %errno.
  82  *
  83  * This function resets %errno before calling strtod() so that
  84  * you can reliably detect overflow and underflow.
  85  *
  86  * Return value: the #gdouble value.
  87  **/
  88
  89 #ifndef PY_NO_SHORT_FLOAT_REPR
  90
  91 double
  92 _PyOS_ascii_strtod(const char *nptr, char **endptr)
  93 {
  94         double result;
  95         _Py_SET_53BIT_PRECISION_HEADER;
  96
  97         assert(nptr != NULL);
  98         /* Set errno to zero, so that we can distinguish zero results
  99            and underflows */
 100         errno = 0;
 101
 102         _Py_SET_53BIT_PRECISION_START;
 103         result = _Py_dg_strtod(nptr, endptr);
 104         _Py_SET_53BIT_PRECISION_END;
 105
 106         if (*endptr == nptr)
 107                 /* string might represent and inf or nan */
 108                 result = _Py_parse_inf_or_nan(nptr, endptr);
 109
 110         return result;
 111
 112 }
 113
 114 #else
 115
 116 /*
 117    Use system strtod;  since strtod is locale aware, we may
 118    have to first fix the decimal separator.
 119
 120    Note that unlike _Py_dg_strtod, the system strtod may not always give
 121    correctly rounded results.
 122 */
 123
 124 double
 125 _PyOS_ascii_strtod(const char *nptr, char **endptr)
 126 {
 127         char *fail_pos;
 128         double val = -1.0;
 129         struct lconv *locale_data;
 130         const char *decimal_point;
 131         size_t decimal_point_len;
 132         const char *p, *decimal_point_pos;
 133         const char *end = NULL; /* Silence gcc */
 134         const char *digits_pos = NULL;
 135         int negate = 0;
 136
 137         assert(nptr != NULL);
 138
 139         fail_pos = NULL;
 140
 141         locale_data = localeconv();
 142         decimal_point = locale_data->decimal_point;
 143         decimal_point_len = strlen(decimal_point);
 144
 145         assert(decimal_point_len != 0);
 146
 147         decimal_point_pos = NULL;
 148
 149         /* Parse infinities and nans */
 150         val = _Py_parse_inf_or_nan(nptr, endptr);
 151         if (*endptr != nptr)
 152                 return val;
 153
 154         /* Set errno to zero, so that we can distinguish zero results
 155            and underflows */
 156         errno = 0;
 157
 158         /* We process the optional sign manually, then pass the remainder to
 159            the system strtod.  This ensures that the result of an underflow
 160            has the correct sign. (bug #1725)  */
 161         p = nptr;
 162         /* Process leading sign, if present */
 163         if (*p == '-') {
 164                 negate = 1;
 165                 p++;
 166         }
 167         else if (*p == '+') {
 168                 p++;
 169         }
 170
 171         /* Some platform strtods accept hex floats; Python shouldn't (at the
 172            moment), so we check explicitly for strings starting with '0x'. */
 173         if (*p == '0' && (*(p+1) == 'x' || *(p+1) == 'X'))
 174                 goto invalid_string;
 175
 176         /* Check that what's left begins with a digit or decimal point */
 177         if (!Py_ISDIGIT(*p) && *p != '.')
 178                 goto invalid_string;
 179
 180         digits_pos = p;
 181         if (decimal_point[0] != '.' ||
 182             decimal_point[1] != 0)
 183         {
 184                 /* Look for a '.' in the input; if present, it'll need to be
 185                    swapped for the current locale's decimal point before we
 186                    call strtod.  On the other hand, if we find the current
 187                    locale's decimal point then the input is invalid. */
 188                 while (Py_ISDIGIT(*p))
 189                         p++;
 190
 191                 if (*p == '.')
 192                 {
 193                         decimal_point_pos = p++;
 194
 195                         /* locate end of number */
 196                         while (Py_ISDIGIT(*p))
 197                                 p++;
 198
 199                         if (*p == 'e' || *p == 'E')
 200                                 p++;
 201                         if (*p == '+' || *p == '-')
 202                                 p++;
 203                         while (Py_ISDIGIT(*p))
 204                                 p++;
 205                         end = p;
 206                 }
 207                 else if (strncmp(p, decimal_point, decimal_point_len) == 0)
 208                         /* Python bug #1417699 */
 209                         goto invalid_string;
 210                 /* For the other cases, we need not convert the decimal
 211                    point */
 212         }
 213
 214         if (decimal_point_pos) {
 215                 char *copy, *c;
 216                 /* Create a copy of the input, with the '.' converted to the
 217                    locale-specific decimal point */
 218                 copy = (char *)PyMem_MALLOC(end - digits_pos +
 219                                             1 + decimal_point_len);
 220                 if (copy == NULL) {
 221                         *endptr = (char *)nptr;
 222                         errno = ENOMEM;
 223                         return val;
 224                 }
 225
 226                 c = copy;
 227                 memcpy(c, digits_pos, decimal_point_pos - digits_pos);
 228                 c += decimal_point_pos - digits_pos;
 229                 memcpy(c, decimal_point, decimal_point_len);
 230                 c += decimal_point_len;
 231                 memcpy(c, decimal_point_pos + 1,
 232                        end - (decimal_point_pos + 1));
 233                 c += end - (decimal_point_pos + 1);
 234                 *c = 0;
 235
 236                 val = strtod(copy, &fail_pos);
 237
 238                 if (fail_pos)
 239                 {
 240                         if (fail_pos > decimal_point_pos)
 241                                 fail_pos = (char *)digits_pos +
 242                                         (fail_pos - copy) -
 243                                         (decimal_point_len - 1);
 244                         else
 245                                 fail_pos = (char *)digits_pos +
 246                                         (fail_pos - copy);
 247                 }
 248
 249                 PyMem_FREE(copy);
 250
 251         }
 252         else {
 253                 val = strtod(digits_pos, &fail_pos);
 254         }
 255
 256         if (fail_pos == digits_pos)
 257                 goto invalid_string;
 258
 259         if (negate && fail_pos != nptr)
 260                 val = -val;
 261         *endptr = fail_pos;
 262
 263         return val;
 264
 265   invalid_string:
 266         *endptr = (char*)nptr;
 267         errno = EINVAL;
 268         return -1.0;
 269 }
 270
 271 #endif
 272
 273 double
 274 PyOS_ascii_strtod(const char *nptr, char **endptr)
 275 {
 276         char *fail_pos;
 277         const char *p;
 278         double x;
 279
 280         /* _PyOS_ascii_strtod already does everything that we want,
 281            except that it doesn't parse leading whitespace */
 282         p = nptr;
 283         while (Py_ISSPACE(*p))
 284                 p++;
 285         x = _PyOS_ascii_strtod(p, &fail_pos);
 286         if (fail_pos == p)
 287                 fail_pos = (char *)nptr;
 288         if (endptr)
 289                 *endptr = (char *)fail_pos;
 290         return x;
 291 }
 292
 293 double
 294 PyOS_ascii_atof(const char *nptr)
 295 {
 296         return PyOS_ascii_strtod(nptr, NULL);
 297 }
 298
 299 /* PyOS_string_to_double is the recommended replacement for the
 300    PyOS_ascii_strtod and PyOS_ascii_atof functions.  It converts a
 301    null-terminated byte string s (interpreted as a string of ASCII characters)
 302    to a float.  The string should not have leading or trailing whitespace (in
 303    contrast, PyOS_ascii_strtod allows leading whitespace but not trailing
 304    whitespace).  The conversion is independent of the current locale.
 305
 306    If endptr is NULL, try to convert the whole string.  Raise ValueError and
 307    return -1.0 if the string is not a valid representation of a floating-point
 308    number.
 309
 310    If endptr is non-NULL, try to convert as much of the string as possible.
 311    If no initial segment of the string is the valid representation of a
 312    floating-point number then *endptr is set to point to the beginning of the
 313    string, -1.0 is returned and again ValueError is raised.
 314
 315    On overflow (e.g., when trying to convert '1e500' on an IEEE 754 machine),
 316    if overflow_exception is NULL then +-Py_HUGE_VAL is returned, and no Python
 317    exception is raised.  Otherwise, overflow_exception should point to a
 318    a Python exception, this exception will be raised, -1.0 will be returned,
 319    and *endptr will point just past the end of the converted value.
 320
 321    If any other failure occurs (for example lack of memory), -1.0 is returned
 322    and the appropriate Python exception will have been set.
 323 */
 324
 325 double
 326 PyOS_string_to_double(const char *s,
 327                       char **endptr,
 328                       PyObject *overflow_exception)
 329 {
 330         double x, result=-1.0;
 331         char *fail_pos;
 332
 333         errno = 0;
 334         PyFPE_START_PROTECT("PyOS_string_to_double", return -1.0)
 335         x = PyOS_ascii_strtod(s, &fail_pos);
 336         PyFPE_END_PROTECT(x)
 337
 338         if (errno == ENOMEM) {
 339                 PyErr_NoMemory();
 340                 fail_pos = (char *)s;
 341         }
 342         else if (!endptr && (fail_pos == s || *fail_pos != '\0'))
 343                 PyErr_Format(PyExc_ValueError,
 344                               "could not convert string to float: "
 345                               "%.200s", s);
 346         else if (fail_pos == s)
 347                 PyErr_Format(PyExc_ValueError,
 348                               "could not convert string to float: "
 349                               "%.200s", s);
 350         else if (errno == ERANGE && fabs(x) >= 1.0 && overflow_exception)
 351                 PyErr_Format(overflow_exception,
 352                               "value too large to convert to float: "
 353                               "%.200s", s);
 354         else
 355                 result = x;
 356
 357         if (endptr != NULL)
 358                 *endptr = fail_pos;
 359         return result;
 360 }
 361
 362 /* Given a string that may have a decimal point in the current
 363    locale, change it back to a dot.  Since the string cannot get
 364    longer, no need for a maximum buffer size parameter. */
 365 Py_LOCAL_INLINE(void)
 366 change_decimal_from_locale_to_dot(char* buffer)
 367 {
 368         struct lconv *locale_data = localeconv();
 369         const char *decimal_point = locale_data->decimal_point;
 370
 371         if (decimal_point[0] != '.' || decimal_point[1] != 0) {
 372                 size_t decimal_point_len = strlen(decimal_point);
 373
 374                 if (*buffer == '+' || *buffer == '-')
 375                         buffer++;
 376                 while (Py_ISDIGIT(*buffer))
 377                         buffer++;
 378                 if (strncmp(buffer, decimal_point, decimal_point_len) == 0) {
 379                         *buffer = '.';
 380                         buffer++;
 381                         if (decimal_point_len > 1) {
 382                                 /* buffer needs to get smaller */
 383                                 size_t rest_len = strlen(buffer +
 384                                                      (decimal_point_len - 1));
 385                                 memmove(buffer,
 386                                         buffer + (decimal_point_len - 1),
 387                                         rest_len);
 388                                 buffer[rest_len] = 0;
 389                         }
 390                 }
 391         }
 392 }
 393
 394
 395 /* From the C99 standard, section 7.19.6:
 396 The exponent always contains at least two digits, and only as many more digits
 397 as necessary to represent the exponent.
 398 */
 399 #define MIN_EXPONENT_DIGITS 2
 400
 401 /* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS
 402    in length. */
 403 Py_LOCAL_INLINE(void)
 404 ensure_minimum_exponent_length(char* buffer, size_t buf_size)
 405 {
 406         char *p = strpbrk(buffer, "eE");
 407         if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
 408                 char *start = p + 2;
 409                 int exponent_digit_cnt = 0;
 410                 int leading_zero_cnt = 0;
 411                 int in_leading_zeros = 1;
 412                 int significant_digit_cnt;
 413
 414                 /* Skip over the exponent and the sign. */
 415                 p += 2;
 416
 417                 /* Find the end of the exponent, keeping track of leading
 418                    zeros. */
 419                 while (*p && Py_ISDIGIT(*p)) {
 420                         if (in_leading_zeros && *p == '0')
 421                                 ++leading_zero_cnt;
 422                         if (*p != '0')
 423                                 in_leading_zeros = 0;
 424                         ++p;
 425                         ++exponent_digit_cnt;
 426                 }
 427
 428                 significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
 429                 if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
 430                         /* If there are 2 exactly digits, we're done,
 431                            regardless of what they contain */
 432                 }
 433                 else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
 434                         int extra_zeros_cnt;
 435
 436                         /* There are more than 2 digits in the exponent.  See
 437                            if we can delete some of the leading zeros */
 438                         if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
 439                                 significant_digit_cnt = MIN_EXPONENT_DIGITS;
 440                         extra_zeros_cnt = exponent_digit_cnt -
 441                                 significant_digit_cnt;
 442
 443                         /* Delete extra_zeros_cnt worth of characters from the
 444                            front of the exponent */
 445                         assert(extra_zeros_cnt >= 0);
 446
 447                         /* Add one to significant_digit_cnt to copy the
 448                            trailing 0 byte, thus setting the length */
 449                         memmove(start,
 450                                 start + extra_zeros_cnt,
 451                                 significant_digit_cnt + 1);
 452                 }
 453                 else {
 454                         /* If there are fewer than 2 digits, add zeros
 455                            until there are 2, if there's enough room */
 456                         int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
 457                         if (start + zeros + exponent_digit_cnt + 1
 458                               < buffer + buf_size) {
 459                                 memmove(start + zeros, start,
 460                                         exponent_digit_cnt + 1);
 461                                 memset(start, '0', zeros);
 462                         }
 463                 }
 464         }
 465 }
 466
 467 /* Remove trailing zeros after the decimal point from a numeric string; also
 468    remove the decimal point if all digits following it are zero.  The numeric
 469    string must end in '\0', and should not have any leading or trailing
 470    whitespace.  Assumes that the decimal point is '.'. */
 471 Py_LOCAL_INLINE(void)
 472 remove_trailing_zeros(char *buffer)
 473 {
 474         char *old_fraction_end, *new_fraction_end, *end, *p;
 475
 476         p = buffer;
 477         if (*p == '-' || *p == '+')
 478                 /* Skip leading sign, if present */
 479                 ++p;
 480         while (Py_ISDIGIT(*p))
 481                 ++p;
 482
 483         /* if there's no decimal point there's nothing to do */
 484         if (*p++ != '.')
 485                 return;
 486
 487         /* scan any digits after the point */
 488         while (Py_ISDIGIT(*p))
 489                 ++p;
 490         old_fraction_end = p;
 491
 492         /* scan up to ending '\0' */
 493         while (*p != '\0')
 494                 p++;
 495         /* +1 to make sure that we move the null byte as well */
 496         end = p+1;
 497
 498         /* scan back from fraction_end, looking for removable zeros */
 499         p = old_fraction_end;
 500         while (*(p-1) == '0')
 501                 --p;
 502         /* and remove point if we've got that far */
 503         if (*(p-1) == '.')
 504                 --p;
 505         new_fraction_end = p;
 506
 507         memmove(new_fraction_end, old_fraction_end, end-old_fraction_end);
 508 }
 509
 510 /* Ensure that buffer has a decimal point in it.  The decimal point will not
 511    be in the current locale, it will always be '.'. Don't add a decimal point
 512    if an exponent is present.  Also, convert to exponential notation where
 513    adding a '.0' would produce too many significant digits (see issue 5864).
 514
 515    Returns a pointer to the fixed buffer, or NULL on failure.
 516 */
 517 Py_LOCAL_INLINE(char *)
 518 ensure_decimal_point(char* buffer, size_t buf_size, int precision)
 519 {
 520         int digit_count, insert_count = 0, convert_to_exp = 0;
 521         char *chars_to_insert, *digits_start;
 522
 523         /* search for the first non-digit character */
 524         char *p = buffer;
 525         if (*p == '-' || *p == '+')
 526                 /* Skip leading sign, if present.  I think this could only
 527                    ever be '-', but it can't hurt to check for both. */
 528                 ++p;
 529         digits_start = p;
 530         while (*p && Py_ISDIGIT(*p))
 531                 ++p;
 532         digit_count = Py_SAFE_DOWNCAST(p - digits_start, Py_ssize_t, int);
 533
 534         if (*p == '.') {
 535                 if (Py_ISDIGIT(*(p+1))) {
 536                         /* Nothing to do, we already have a decimal
 537                            point and a digit after it */
 538                 }
 539                 else {
 540                         /* We have a decimal point, but no following
 541                            digit.  Insert a zero after the decimal. */
 542                         /* can't ever get here via PyOS_double_to_string */
 543                         assert(precision == -1);
 544                         ++p;
 545                         chars_to_insert = "0";
 546                         insert_count = 1;
 547                 }
 548         }
 549         else if (!(*p == 'e' || *p == 'E')) {
 550                 /* Don't add ".0" if we have an exponent. */
 551                 if (digit_count == precision) {
 552                         /* issue 5864: don't add a trailing .0 in the case
 553                            where the '%g'-formatted result already has as many
 554                            significant digits as were requested.  Switch to
 555                            exponential notation instead. */
 556                         convert_to_exp = 1;
 557                         /* no exponent, no point, and we shouldn't land here
 558                            for infs and nans, so we must be at the end of the
 559                            string. */
 560                         assert(*p == '\0');
 561                 }
 562                 else {
 563                         assert(precision == -1 || digit_count < precision);
 564                         chars_to_insert = ".0";
 565                         insert_count = 2;
 566                 }
 567         }
 568         if (insert_count) {
 569                 size_t buf_len = strlen(buffer);
 570                 if (buf_len + insert_count + 1 >= buf_size) {
 571                         /* If there is not enough room in the buffer
 572                            for the additional text, just skip it.  It's
 573                            not worth generating an error over. */
 574                 }
 575                 else {
 576                         memmove(p + insert_count, p,
 577                                 buffer + strlen(buffer) - p + 1);
 578                         memcpy(p, chars_to_insert, insert_count);
 579                 }
 580         }
 581         if (convert_to_exp) {
 582                 int written;
 583                 size_t buf_avail;
 584                 p = digits_start;
 585                 /* insert decimal point */
 586                 assert(digit_count >= 1);
 587                 memmove(p+2, p+1, digit_count); /* safe, but overwrites nul */
 588                 p[1] = '.';
 589                 p += digit_count+1;
 590                 assert(p <= buf_size+buffer);
 591                 buf_avail = buf_size+buffer-p;
 592                 if (buf_avail == 0)
 593                         return NULL;
 594                 /* Add exponent.  It's okay to use lower case 'e': we only
 595                    arrive here as a result of using the empty format code or
 596                    repr/str builtins and those never want an upper case 'E' */
 597                 written = PyOS_snprintf(p, buf_avail, "e%+.02d", digit_count-1);
 598                 if (!(0 <= written &&
 599                       written < Py_SAFE_DOWNCAST(buf_avail, size_t, int)))
 600                         /* output truncated, or something else bad happened */
 601                         return NULL;
 602                 remove_trailing_zeros(buffer);
 603         }
 604         return buffer;
 605 }
 606
 607 /* see FORMATBUFLEN in unicodeobject.c */
 608 #define FLOAT_FORMATBUFLEN 120
 609
 610 /**
 611  * PyOS_ascii_formatd:
 612  * @buffer: A buffer to place the resulting string in
 613  * @buf_size: The length of the buffer.
 614  * @format: The printf()-style format to use for the
 615  *          code to use for converting.
 616  * @d: The #gdouble to convert
 617  *
 618  * Converts a #gdouble to a string, using the '.' as
 619  * decimal point. To format the number you pass in
 620  * a printf()-style format string. Allowed conversion
 621  * specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'Z'.
 622  *
 623  * 'Z' is the same as 'g', except it always has a decimal and
 624  *     at least one digit after the decimal.
 625  *
 626  * Return value: The pointer to the buffer with the converted string.
 627  * On failure returns NULL but does not set any Python exception.
 628  **/
 629 char *
 630 _PyOS_ascii_formatd(char       *buffer,
 631                    size_t      buf_size,
 632                    const char *format,
 633                    double      d,
 634                    int         precision)
 635 {
 636         char format_char;
 637         size_t format_len = strlen(format);
 638
 639         /* Issue 2264: code 'Z' requires copying the format.  'Z' is 'g', but
 640            also with at least one character past the decimal. */
 641         char tmp_format[FLOAT_FORMATBUFLEN];
 642
 643         /* The last character in the format string must be the format char */
 644         format_char = format[format_len - 1];
 645
 646         if (format[0] != '%')
 647                 return NULL;
 648
 649         /* I'm not sure why this test is here.  It's ensuring that the format
 650            string after the first character doesn't have a single quote, a
 651            lowercase l, or a percent. This is the reverse of the commented-out
 652            test about 10 lines ago. */
 653         if (strpbrk(format + 1, "'l%"))
 654                 return NULL;
 655
 656         /* Also curious about this function is that it accepts format strings
 657            like "%xg", which are invalid for floats.  In general, the
 658            interface to this function is not very good, but changing it is
 659            difficult because it's a public API. */
 660
 661         if (!(format_char == 'e' || format_char == 'E' ||
 662               format_char == 'f' || format_char == 'F' ||
 663               format_char == 'g' || format_char == 'G' ||
 664               format_char == 'Z'))
 665                 return NULL;
 666
 667         /* Map 'Z' format_char to 'g', by copying the format string and
 668            replacing the final char with a 'g' */
 669         if (format_char == 'Z') {
 670                 if (format_len + 1 >= sizeof(tmp_format)) {
 671                         /* The format won't fit in our copy.  Error out.  In
 672                            practice, this will never happen and will be
 673                            detected by returning NULL */
 674                         return NULL;
 675                 }
 676                 strcpy(tmp_format, format);
 677                 tmp_format[format_len - 1] = 'g';
 678                 format = tmp_format;
 679         }
 680
 681
 682         /* Have PyOS_snprintf do the hard work */
 683         PyOS_snprintf(buffer, buf_size, format, d);
 684
 685         /* Do various fixups on the return string */
 686
 687         /* Get the current locale, and find the decimal point string.
 688            Convert that string back to a dot. */
 689         change_decimal_from_locale_to_dot(buffer);
 690
 691         /* If an exponent exists, ensure that the exponent is at least
 692            MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
 693            for the extra zeros.  Also, if there are more than
 694            MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
 695            back to MIN_EXPONENT_DIGITS */
 696         ensure_minimum_exponent_length(buffer, buf_size);
 697
 698         /* If format_char is 'Z', make sure we have at least one character
 699            after the decimal point (and make sure we have a decimal point);
 700            also switch to exponential notation in some edge cases where the
 701            extra character would produce more significant digits that we
 702            really want. */
 703         if (format_char == 'Z')
 704                 buffer = ensure_decimal_point(buffer, buf_size, precision);
 705
 706         return buffer;
 707 }
 708
 709 char *
 710 PyOS_ascii_formatd(char       *buffer,
 711                    size_t      buf_size,
 712                    const char *format,
 713                    double      d)
 714 {
 715         if (PyErr_WarnEx(PyExc_DeprecationWarning,
 716                          "PyOS_ascii_formatd is deprecated, "
 717                          "use PyOS_double_to_string instead", 1) < 0)
 718                 return NULL;
 719
 720         return _PyOS_ascii_formatd(buffer, buf_size, format, d, -1);
 721 }
 722
 723 #ifdef PY_NO_SHORT_FLOAT_REPR
 724
 725 /* The fallback code to use if _Py_dg_dtoa is not available. */
 726
 727 PyAPI_FUNC(char *) PyOS_double_to_string(double val,
 728                                          char format_code,
 729                                          int precision,
 730                                          int flags,
 731                                          int *type)
 732 {
 733         char format[32];
 734         Py_ssize_t bufsize;
 735         char *buf;
 736         int t, exp;
 737         int upper = 0;
 738
 739         /* Validate format_code, and map upper and lower case */
 740         switch (format_code) {
 741         case 'e':          /* exponent */
 742         case 'f':          /* fixed */
 743         case 'g':          /* general */
 744                 break;
 745         case 'E':
 746                 upper = 1;
 747                 format_code = 'e';
 748                 break;
 749         case 'F':
 750                 upper = 1;
 751                 format_code = 'f';
 752                 break;
 753         case 'G':
 754                 upper = 1;
 755                 format_code = 'g';
 756                 break;
 757         case 'r':          /* repr format */
 758                 /* Supplied precision is unused, must be 0. */
 759                 if (precision != 0) {
 760                         PyErr_BadInternalCall();
 761                         return NULL;
 762                 }
 763                 /* The repr() precision (17 significant decimal digits) is the
 764                    minimal number that is guaranteed to have enough precision
 765                    so that if the number is read back in the exact same binary
 766                    value is recreated.  This is true for IEEE floating point
 767                    by design, and also happens to work for all other modern
 768                    hardware. */
 769                 precision = 17;
 770                 format_code = 'g';
 771                 break;
 772         default:
 773                 PyErr_BadInternalCall();
 774                 return NULL;
 775         }
 776
 777         /* Here's a quick-and-dirty calculation to figure out how big a buffer
 778            we need.  In general, for a finite float we need:
 779
 780              1 byte for each digit of the decimal significand, and
 781
 782              1 for a possible sign
 783              1 for a possible decimal point
 784              2 for a possible [eE][+-]
 785              1 for each digit of the exponent;  if we allow 19 digits
 786                total then we're safe up to exponents of 2**63.
 787              1 for the trailing nul byte
 788
 789            This gives a total of 24 + the number of digits in the significand,
 790            and the number of digits in the significand is:
 791
 792              for 'g' format: at most precision, except possibly
 793                when precision == 0, when it's 1.
 794              for 'e' format: precision+1
 795              for 'f' format: precision digits after the point, at least 1
 796                before.  To figure out how many digits appear before the point
 797                we have to examine the size of the number.  If fabs(val) < 1.0
 798                then there will be only one digit before the point.  If
 799                fabs(val) >= 1.0, then there are at most
 800
 801                  1+floor(log10(ceiling(fabs(val))))
 802
 803                digits before the point (where the 'ceiling' allows for the
 804                possibility that the rounding rounds the integer part of val
 805                up).  A safe upper bound for the above quantity is
 806                1+floor(exp/3), where exp is the unique integer such that 0.5
 807                <= fabs(val)/2**exp < 1.0.  This exp can be obtained from
 808                frexp.
 809
 810            So we allow room for precision+1 digits for all formats, plus an
 811            extra floor(exp/3) digits for 'f' format.
 812
 813         */
 814
 815         if (Py_IS_NAN(val) || Py_IS_INFINITY(val))
 816                 /* 3 for 'inf'/'nan', 1 for sign, 1 for '\0' */
 817                 bufsize = 5;
 818         else {
 819                 bufsize = 25 + precision;
 820                 if (format_code == 'f' && fabs(val) >= 1.0) {
 821                         frexp(val, &exp);
 822                         bufsize += exp/3;
 823                 }
 824         }
 825
 826         buf = PyMem_Malloc(bufsize);
 827         if (buf == NULL) {
 828                 PyErr_NoMemory();
 829                 return NULL;
 830         }
 831
 832         /* Handle nan and inf. */
 833         if (Py_IS_NAN(val)) {
 834                 strcpy(buf, "nan");
 835                 t = Py_DTST_NAN;
 836         } else if (Py_IS_INFINITY(val)) {
 837                 if (copysign(1., val) == 1.)
 838                         strcpy(buf, "inf");
 839                 else
 840                         strcpy(buf, "-inf");
 841                 t = Py_DTST_INFINITE;
 842         } else {
 843                 t = Py_DTST_FINITE;
 844                 if (flags & Py_DTSF_ADD_DOT_0)
 845                         format_code = 'Z';
 846
 847                 PyOS_snprintf(format, sizeof(format), "%%%s.%i%c",
 848                               (flags & Py_DTSF_ALT ? "#" : ""), precision,
 849                               format_code);
 850                 _PyOS_ascii_formatd(buf, bufsize, format, val, precision);
 851         }
 852
 853         /* Add sign when requested.  It's convenient (esp. when formatting
 854          complex numbers) to include a sign even for inf and nan. */
 855         if (flags & Py_DTSF_SIGN && buf[0] != '-') {
 856                 size_t len = strlen(buf);
 857                 /* the bufsize calculations above should ensure that we've got
 858                    space to add a sign */
 859                 assert((size_t)bufsize >= len+2);
 860                 memmove(buf+1, buf, len+1);
 861                 buf[0] = '+';
 862         }
 863         if (upper) {
 864                 /* Convert to upper case. */
 865                 char *p1;
 866                 for (p1 = buf; *p1; p1++)
 867                         *p1 = Py_TOUPPER(*p1);
 868         }
 869
 870         if (type)
 871                 *type = t;
 872         return buf;
 873 }
 874
 875 #else
 876
 877 /* _Py_dg_dtoa is available. */
 878
 879 /* I'm using a lookup table here so that I don't have to invent a non-locale
 880    specific way to convert to uppercase */
 881 #define OFS_INF 0
 882 #define OFS_NAN 1
 883 #define OFS_E 2
 884
 885 /* The lengths of these are known to the code below, so don't change them */
 886 static char *lc_float_strings[] = {
 887         "inf",
 888         "nan",
 889         "e",
 890 };
 891 static char *uc_float_strings[] = {
 892         "INF",
 893         "NAN",
 894         "E",
 895 };
 896
 897
 898 /* Convert a double d to a string, and return a PyMem_Malloc'd block of
 899    memory contain the resulting string.
 900
 901    Arguments:
 902      d is the double to be converted
 903      format_code is one of 'e', 'f', 'g', 'r'.  'e', 'f' and 'g'
 904        correspond to '%e', '%f' and '%g';  'r' corresponds to repr.
 905      mode is one of '0', '2' or '3', and is completely determined by
 906        format_code: 'e' and 'g' use mode 2; 'f' mode 3, 'r' mode 0.
 907      precision is the desired precision
 908      always_add_sign is nonzero if a '+' sign should be included for positive
 909        numbers
 910      add_dot_0_if_integer is nonzero if integers in non-exponential form
 911        should have ".0" added.  Only applies to format codes 'r' and 'g'.
 912      use_alt_formatting is nonzero if alternative formatting should be
 913        used.  Only applies to format codes 'e', 'f' and 'g'.  For code 'g',
 914        at most one of use_alt_formatting and add_dot_0_if_integer should
 915        be nonzero.
 916      type, if non-NULL, will be set to one of these constants to identify
 917        the type of the 'd' argument:
 918          Py_DTST_FINITE
 919          Py_DTST_INFINITE
 920          Py_DTST_NAN
 921
 922    Returns a PyMem_Malloc'd block of memory containing the resulting string,
 923     or NULL on error. If NULL is returned, the Python error has been set.
 924  */
 925
 926 static char *
 927 format_float_short(double d, char format_code,
 928                    int mode, Py_ssize_t precision,
 929                    int always_add_sign, int add_dot_0_if_integer,
 930                    int use_alt_formatting, char **float_strings, int *type)
 931 {
 932         char *buf = NULL;
 933         char *p = NULL;
 934         Py_ssize_t bufsize = 0;
 935         char *digits, *digits_end;
 936         int decpt_as_int, sign, exp_len, exp = 0, use_exp = 0;
 937         Py_ssize_t decpt, digits_len, vdigits_start, vdigits_end;
 938         _Py_SET_53BIT_PRECISION_HEADER;
 939
 940         /* _Py_dg_dtoa returns a digit string (no decimal point or exponent).
 941            Must be matched by a call to _Py_dg_freedtoa. */
 942         _Py_SET_53BIT_PRECISION_START;
 943         digits = _Py_dg_dtoa(d, mode, precision, &decpt_as_int, &sign,
 944                              &digits_end);
 945         _Py_SET_53BIT_PRECISION_END;
 946
 947         decpt = (Py_ssize_t)decpt_as_int;
 948         if (digits == NULL) {
 949                 /* The only failure mode is no memory. */
 950                 PyErr_NoMemory();
 951                 goto exit;
 952         }
 953         assert(digits_end != NULL && digits_end >= digits);
 954         digits_len = digits_end - digits;
 955
 956         if (digits_len && !Py_ISDIGIT(digits[0])) {
 957                 /* Infinities and nans here; adapt Gay's output,
 958                    so convert Infinity to inf and NaN to nan, and
 959                    ignore sign of nan. Then return. */
 960
 961                 /* ignore the actual sign of a nan */
 962                 if (digits[0] == 'n' || digits[0] == 'N')
 963                         sign = 0;
 964
 965                 /* We only need 5 bytes to hold the result "+inf\0" . */
 966                 bufsize = 5; /* Used later in an assert. */
 967                 buf = (char *)PyMem_Malloc(bufsize);
 968                 if (buf == NULL) {
 969                         PyErr_NoMemory();
 970                         goto exit;
 971                 }
 972                 p = buf;
 973
 974                 if (sign == 1) {
 975                         *p++ = '-';
 976                 }
 977                 else if (always_add_sign) {
 978                         *p++ = '+';
 979                 }
 980                 if (digits[0] == 'i' || digits[0] == 'I') {
 981                         strncpy(p, float_strings[OFS_INF], 3);
 982                         p += 3;
 983
 984                         if (type)
 985                                 *type = Py_DTST_INFINITE;
 986                 }
 987                 else if (digits[0] == 'n' || digits[0] == 'N') {
 988                         strncpy(p, float_strings[OFS_NAN], 3);
 989                         p += 3;
 990
 991                         if (type)
 992                                 *type = Py_DTST_NAN;
 993                 }
 994                 else {
 995                         /* shouldn't get here: Gay's code should always return
 996                            something starting with a digit, an 'I',  or 'N' */
 997                         strncpy(p, "ERR", 3);
 998                         p += 3;
 999                         assert(0);
1000                 }
1001                 goto exit;
1002         }
1003
1004         /* The result must be finite (not inf or nan). */
1005         if (type)
1006                 *type = Py_DTST_FINITE;
1007
1008
1009         /* We got digits back, format them.  We may need to pad 'digits'
1010            either on the left or right (or both) with extra zeros, so in
1011            general the resulting string has the form
1012
1013              [<sign>]<zeros><digits><zeros>[<exponent>]
1014
1015            where either of the <zeros> pieces could be empty, and there's a
1016            decimal point that could appear either in <digits> or in the
1017            leading or trailing <zeros>.
1018
1019            Imagine an infinite 'virtual' string vdigits, consisting of the
1020            string 'digits' (starting at index 0) padded on both the left and
1021            right with infinite strings of zeros.  We want to output a slice
1022
1023              vdigits[vdigits_start : vdigits_end]
1024
1025            of this virtual string.  Thus if vdigits_start < 0 then we'll end
1026            up producing some leading zeros; if vdigits_end > digits_len there
1027            will be trailing zeros in the output.  The next section of code
1028            determines whether to use an exponent or not, figures out the
1029            position 'decpt' of the decimal point, and computes 'vdigits_start'
1030            and 'vdigits_end'. */
1031         vdigits_end = digits_len;
1032         switch (format_code) {
1033         case 'e':
1034                 use_exp = 1;
1035                 vdigits_end = precision;
1036                 break;
1037         case 'f':
1038                 vdigits_end = decpt + precision;
1039                 break;
1040         case 'g':
1041                 if (decpt <= -4 || decpt >
1042                     (add_dot_0_if_integer ? precision-1 : precision))
1043                         use_exp = 1;
1044                 if (use_alt_formatting)
1045                         vdigits_end = precision;
1046                 break;
1047         case 'r':
1048                 /* convert to exponential format at 1e16.  We used to convert
1049                    at 1e17, but that gives odd-looking results for some values
1050                    when a 16-digit 'shortest' repr is padded with bogus zeros.
1051                    For example, repr(2e16+8) would give 20000000000000010.0;
1052                    the true value is 20000000000000008.0. */
1053                 if (decpt <= -4 || decpt > 16)
1054                         use_exp = 1;
1055                 break;
1056         default:
1057                 PyErr_BadInternalCall();
1058                 goto exit;
1059         }
1060
1061         /* if using an exponent, reset decimal point position to 1 and adjust
1062            exponent accordingly.*/
1063         if (use_exp) {
1064                 exp = decpt - 1;
1065                 decpt = 1;
1066         }
1067         /* ensure vdigits_start < decpt <= vdigits_end, or vdigits_start <
1068            decpt < vdigits_end if add_dot_0_if_integer and no exponent */
1069         vdigits_start = decpt <= 0 ? decpt-1 : 0;
1070         if (!use_exp && add_dot_0_if_integer)
1071                 vdigits_end = vdigits_end > decpt ? vdigits_end : decpt + 1;
1072         else
1073                 vdigits_end = vdigits_end > decpt ? vdigits_end : decpt;
1074
1075         /* double check inequalities */
1076         assert(vdigits_start <= 0 &&
1077                0 <= digits_len &&
1078                digits_len <= vdigits_end);
1079         /* decimal point should be in (vdigits_start, vdigits_end] */
1080         assert(vdigits_start < decpt && decpt <= vdigits_end);
1081
1082         /* Compute an upper bound how much memory we need. This might be a few
1083            chars too long, but no big deal. */
1084         bufsize =
1085                 /* sign, decimal point and trailing 0 byte */
1086                 3 +
1087
1088                 /* total digit count (including zero padding on both sides) */
1089                 (vdigits_end - vdigits_start) +
1090
1091                 /* exponent "e+100", max 3 numerical digits */
1092                 (use_exp ? 5 : 0);
1093
1094         /* Now allocate the memory and initialize p to point to the start of
1095            it. */
1096         buf = (char *)PyMem_Malloc(bufsize);
1097         if (buf == NULL) {
1098                 PyErr_NoMemory();
1099                 goto exit;
1100         }
1101         p = buf;
1102
1103         /* Add a negative sign if negative, and a plus sign if non-negative
1104            and always_add_sign is true. */
1105         if (sign == 1)
1106                 *p++ = '-';
1107         else if (always_add_sign)
1108                 *p++ = '+';
1109
1110         /* note that exactly one of the three 'if' conditions is true,
1111            so we include exactly one decimal point */
1112         /* Zero padding on left of digit string */
1113         if (decpt <= 0) {
1114                 memset(p, '0', decpt-vdigits_start);
1115                 p += decpt - vdigits_start;
1116                 *p++ = '.';
1117                 memset(p, '0', 0-decpt);
1118                 p += 0-decpt;
1119         }
1120         else {
1121                 memset(p, '0', 0-vdigits_start);
1122                 p += 0 - vdigits_start;
1123         }
1124
1125         /* Digits, with included decimal point */
1126         if (0 < decpt && decpt <= digits_len) {
1127                 strncpy(p, digits, decpt-0);
1128                 p += decpt-0;
1129                 *p++ = '.';
1130                 strncpy(p, digits+decpt, digits_len-decpt);
1131                 p += digits_len-decpt;
1132         }
1133         else {
1134                 strncpy(p, digits, digits_len);
1135                 p += digits_len;
1136         }
1137
1138         /* And zeros on the right */
1139         if (digits_len < decpt) {
1140                 memset(p, '0', decpt-digits_len);
1141                 p += decpt-digits_len;
1142                 *p++ = '.';
1143                 memset(p, '0', vdigits_end-decpt);
1144                 p += vdigits_end-decpt;
1145         }
1146         else {
1147                 memset(p, '0', vdigits_end-digits_len);
1148                 p += vdigits_end-digits_len;
1149         }
1150
1151         /* Delete a trailing decimal pt unless using alternative formatting. */
1152         if (p[-1] == '.' && !use_alt_formatting)
1153                 p--;
1154
1155         /* Now that we've done zero padding, add an exponent if needed. */
1156         if (use_exp) {
1157                 *p++ = float_strings[OFS_E][0];
1158                 exp_len = sprintf(p, "%+.02d", exp);
1159                 p += exp_len;
1160         }
1161   exit:
1162         if (buf) {
1163                 *p = '\0';
1164                 /* It's too late if this fails, as we've already stepped on
1165                    memory that isn't ours. But it's an okay debugging test. */
1166                 assert(p-buf < bufsize);
1167         }
1168         if (digits)
1169                 _Py_dg_freedtoa(digits);
1170
1171         return buf;
1172 }
1173
1174
1175 PyAPI_FUNC(char *) PyOS_double_to_string(double val,
1176                                          char format_code,
1177                                          int precision,
1178                                          int flags,
1179                                          int *type)
1180 {
1181         char **float_strings = lc_float_strings;
1182         int mode;
1183
1184         /* Validate format_code, and map upper and lower case. Compute the
1185            mode and make any adjustments as needed. */
1186         switch (format_code) {
1187         /* exponent */
1188         case 'E':
1189                 float_strings = uc_float_strings;
1190                 format_code = 'e';
1191                 /* Fall through. */
1192         case 'e':
1193                 mode = 2;
1194                 precision++;
1195                 break;
1196
1197         /* fixed */
1198         case 'F':
1199                 float_strings = uc_float_strings;
1200                 format_code = 'f';
1201                 /* Fall through. */
1202         case 'f':
1203                 mode = 3;
1204                 break;
1205
1206         /* general */
1207         case 'G':
1208                 float_strings = uc_float_strings;
1209                 format_code = 'g';
1210                 /* Fall through. */
1211         case 'g':
1212                 mode = 2;
1213                 /* precision 0 makes no sense for 'g' format; interpret as 1 */
1214                 if (precision == 0)
1215                         precision = 1;
1216                 break;
1217
1218         /* repr format */
1219         case 'r':
1220                 mode = 0;
1221                 /* Supplied precision is unused, must be 0. */
1222                 if (precision != 0) {
1223                         PyErr_BadInternalCall();
1224                         return NULL;
1225                 }
1226                 break;
1227
1228         default:
1229                 PyErr_BadInternalCall();
1230                 return NULL;
1231         }
1232
1233         return format_float_short(val, format_code, mode, precision,
1234                                   flags & Py_DTSF_SIGN,
1235                                   flags & Py_DTSF_ADD_DOT_0,
1236                                   flags & Py_DTSF_ALT,
1237                                   float_strings, type);
1238 }
1239 #endif /* ifdef PY_NO_SHORT_FLOAT_REPR */