Python/pystrtod.c

   1 /* -*- Mode: C; c-file-style: "python" -*- */
   2
   3 #include <Python.h>
   4 #include <locale.h>
   5
   6 /* _Py_parse_inf_or_nan: Attempt to parse a string of the form "nan", "inf" or
   7    "infinity", with an optional leading sign of "+" or "-".  On success,
   8    return the NaN or Infinity as a double and set *endptr to point just beyond
   9    the successfully parsed portion of the string.  On failure, return -1.0 and
  10    set *endptr to point to the start of the string. */
  11
  12 static int
  13 case_insensitive_match(const char *s, const char *t)
  14 {
  15         while(*t && Py_TOLOWER(*s) == *t) {
  16                 s++;
  17                 t++;
  18         }
  19         return *t ? 0 : 1;
  20 }
  21
  22 double
  23 _Py_parse_inf_or_nan(const char *p, char **endptr)
  24 {
  25         double retval;
  26         const char *s;
  27         int negate = 0;
  28
  29         s = p;
  30         if (*s == '-') {
  31                 negate = 1;
  32                 s++;
  33         }
  34         else if (*s == '+') {
  35                 s++;
  36         }
  37         if (case_insensitive_match(s, "inf")) {
  38                 s += 3;
  39                 if (case_insensitive_match(s, "inity"))
  40                         s += 5;
  41                 retval = negate ? -Py_HUGE_VAL : Py_HUGE_VAL;
  42         }
  43 #ifdef Py_NAN
  44         else if (case_insensitive_match(s, "nan")) {
  45                 s += 3;
  46                 retval = negate ? -Py_NAN : Py_NAN;
  47         }
  48 #endif
  49         else {
  50                 s = p;
  51                 retval = -1.0;
  52         }
  53         *endptr = (char *)s;
  54         return retval;
  55 }
  56
  57 /**
  58  * PyOS_ascii_strtod:
  59  * @nptr:    the string to convert to a numeric value.
  60  * @endptr:  if non-%NULL, it returns the character after
  61  *           the last character used in the conversion.
  62  *
  63  * Converts a string to a #gdouble value.
  64  * This function behaves like the standard strtod() function
  65  * does in the C locale. It does this without actually
  66  * changing the current locale, since that would not be
  67  * thread-safe.
  68  *
  69  * This function is typically used when reading configuration
  70  * files or other non-user input that should be locale independent.
  71  * To handle input from the user you should normally use the
  72  * locale-sensitive system strtod() function.
  73  *
  74  * If the correct value would cause overflow, plus or minus %HUGE_VAL
  75  * is returned (according to the sign of the value), and %ERANGE is
  76  * stored in %errno. If the correct value would cause underflow,
  77  * zero is returned and %ERANGE is stored in %errno.
  78  * If memory allocation fails, %ENOMEM is stored in %errno.
  79  *
  80  * This function resets %errno before calling strtod() so that
  81  * you can reliably detect overflow and underflow.
  82  *
  83  * Return value: the #gdouble value.
  84  **/
  85
  86 #ifndef PY_NO_SHORT_FLOAT_REPR
  87
  88 double
  89 _PyOS_ascii_strtod(const char *nptr, char **endptr)
  90 {
  91         double result;
  92         _Py_SET_53BIT_PRECISION_HEADER;
  93
  94         assert(nptr != NULL);
  95         /* Set errno to zero, so that we can distinguish zero results
  96            and underflows */
  97         errno = 0;
  98
  99         _Py_SET_53BIT_PRECISION_START;
 100         result = _Py_dg_strtod(nptr, endptr);
 101         _Py_SET_53BIT_PRECISION_END;
 102
 103         if (*endptr == nptr)
 104                 /* string might represent and inf or nan */
 105                 result = _Py_parse_inf_or_nan(nptr, endptr);
 106
 107         return result;
 108
 109 }
 110
 111 #else
 112
 113 /*
 114    Use system strtod;  since strtod is locale aware, we may
 115    have to first fix the decimal separator.
 116
 117    Note that unlike _Py_dg_strtod, the system strtod may not always give
 118    correctly rounded results.
 119 */
 120
 121 double
 122 _PyOS_ascii_strtod(const char *nptr, char **endptr)
 123 {
 124         char *fail_pos;
 125         double val = -1.0;
 126         struct lconv *locale_data;
 127         const char *decimal_point;
 128         size_t decimal_point_len;
 129         const char *p, *decimal_point_pos;
 130         const char *end = NULL; /* Silence gcc */
 131         const char *digits_pos = NULL;
 132         int negate = 0;
 133
 134         assert(nptr != NULL);
 135
 136         fail_pos = NULL;
 137
 138         locale_data = localeconv();
 139         decimal_point = locale_data->decimal_point;
 140         decimal_point_len = strlen(decimal_point);
 141
 142         assert(decimal_point_len != 0);
 143
 144         decimal_point_pos = NULL;
 145
 146         /* Parse infinities and nans */
 147         val = _Py_parse_inf_or_nan(nptr, endptr);
 148         if (*endptr != nptr)
 149                 return val;
 150
 151         /* Set errno to zero, so that we can distinguish zero results
 152            and underflows */
 153         errno = 0;
 154
 155         /* We process the optional sign manually, then pass the remainder to
 156            the system strtod.  This ensures that the result of an underflow
 157            has the correct sign. (bug #1725)  */
 158         p = nptr;
 159         /* Process leading sign, if present */
 160         if (*p == '-') {
 161                 negate = 1;
 162                 p++;
 163         }
 164         else if (*p == '+') {
 165                 p++;
 166         }
 167
 168         /* Some platform strtods accept hex floats; Python shouldn't (at the
 169            moment), so we check explicitly for strings starting with '0x'. */
 170         if (*p == '0' && (*(p+1) == 'x' || *(p+1) == 'X'))
 171                 goto invalid_string;
 172
 173         /* Check that what's left begins with a digit or decimal point */
 174         if (!Py_ISDIGIT(*p) && *p != '.')
 175                 goto invalid_string;
 176
 177         digits_pos = p;
 178         if (decimal_point[0] != '.' ||
 179             decimal_point[1] != 0)
 180         {
 181                 /* Look for a '.' in the input; if present, it'll need to be
 182                    swapped for the current locale's decimal point before we
 183                    call strtod.  On the other hand, if we find the current
 184                    locale's decimal point then the input is invalid. */
 185                 while (Py_ISDIGIT(*p))
 186                         p++;
 187
 188                 if (*p == '.')
 189                 {
 190                         decimal_point_pos = p++;
 191
 192                         /* locate end of number */
 193                         while (Py_ISDIGIT(*p))
 194                                 p++;
 195
 196                         if (*p == 'e' || *p == 'E')
 197                                 p++;
 198                         if (*p == '+' || *p == '-')
 199                                 p++;
 200                         while (Py_ISDIGIT(*p))
 201                                 p++;
 202                         end = p;
 203                 }
 204                 else if (strncmp(p, decimal_point, decimal_point_len) == 0)
 205                         /* Python bug #1417699 */
 206                         goto invalid_string;
 207                 /* For the other cases, we need not convert the decimal
 208                    point */
 209         }
 210
 211         if (decimal_point_pos) {
 212                 char *copy, *c;
 213                 /* Create a copy of the input, with the '.' converted to the
 214                    locale-specific decimal point */
 215                 copy = (char *)PyMem_MALLOC(end - digits_pos +
 216                                             1 + decimal_point_len);
 217                 if (copy == NULL) {
 218                         *endptr = (char *)nptr;
 219                         errno = ENOMEM;
 220                         return val;
 221                 }
 222
 223                 c = copy;
 224                 memcpy(c, digits_pos, decimal_point_pos - digits_pos);
 225                 c += decimal_point_pos - digits_pos;
 226                 memcpy(c, decimal_point, decimal_point_len);
 227                 c += decimal_point_len;
 228                 memcpy(c, decimal_point_pos + 1,
 229                        end - (decimal_point_pos + 1));
 230                 c += end - (decimal_point_pos + 1);
 231                 *c = 0;
 232
 233                 val = strtod(copy, &fail_pos);
 234
 235                 if (fail_pos)
 236                 {
 237                         if (fail_pos > decimal_point_pos)
 238                                 fail_pos = (char *)digits_pos +
 239                                         (fail_pos - copy) -
 240                                         (decimal_point_len - 1);
 241                         else
 242                                 fail_pos = (char *)digits_pos +
 243                                         (fail_pos - copy);
 244                 }
 245
 246                 PyMem_FREE(copy);
 247
 248         }
 249         else {
 250                 val = strtod(digits_pos, &fail_pos);
 251         }
 252
 253         if (fail_pos == digits_pos)
 254                 goto invalid_string;
 255
 256         if (negate && fail_pos != nptr)
 257                 val = -val;
 258         *endptr = fail_pos;
 259
 260         return val;
 261
 262   invalid_string:
 263         *endptr = (char*)nptr;
 264         errno = EINVAL;
 265         return -1.0;
 266 }
 267
 268 #endif
 269
 270 /* PyOS_ascii_strtod is DEPRECATED in Python 3.1 */
 271
 272 double
 273 PyOS_ascii_strtod(const char *nptr, char **endptr)
 274 {
 275         char *fail_pos;
 276         const char *p;
 277         double x;
 278
 279         if (PyErr_WarnEx(PyExc_DeprecationWarning,
 280                          "PyOS_ascii_strtod and PyOS_ascii_atof are "
 281                          "deprecated.  Use PyOS_string_to_double "
 282                          "instead.", 1) < 0)
 283                 return -1.0;
 284
 285         /* _PyOS_ascii_strtod already does everything that we want,
 286            except that it doesn't parse leading whitespace */
 287         p = nptr;
 288         while (Py_ISSPACE(*p))
 289                 p++;
 290         x = _PyOS_ascii_strtod(p, &fail_pos);
 291         if (fail_pos == p)
 292                 fail_pos = (char *)nptr;
 293         if (endptr)
 294                 *endptr = (char *)fail_pos;
 295         return x;
 296 }
 297
 298 /* PyOS_ascii_strtod is DEPRECATED in Python 3.1 */
 299
 300 double
 301 PyOS_ascii_atof(const char *nptr)
 302 {
 303         return PyOS_ascii_strtod(nptr, NULL);
 304 }
 305
 306 /* PyOS_string_to_double is the recommended replacement for the deprecated
 307    PyOS_ascii_strtod and PyOS_ascii_atof functions.  It converts a
 308    null-terminated byte string s (interpreted as a string of ASCII characters)
 309    to a float.  The string should not have leading or trailing whitespace (in
 310    contrast, PyOS_ascii_strtod allows leading whitespace but not trailing
 311    whitespace).  The conversion is independent of the current locale.
 312
 313    If endptr is NULL, try to convert the whole string.  Raise ValueError and
 314    return -1.0 if the string is not a valid representation of a floating-point
 315    number.
 316
 317    If endptr is non-NULL, try to convert as much of the string as possible.
 318    If no initial segment of the string is the valid representation of a
 319    floating-point number then *endptr is set to point to the beginning of the
 320    string, -1.0 is returned and again ValueError is raised.
 321
 322    On overflow (e.g., when trying to convert '1e500' on an IEEE 754 machine),
 323    if overflow_exception is NULL then +-Py_HUGE_VAL is returned, and no Python
 324    exception is raised.  Otherwise, overflow_exception should point to a
 325    a Python exception, this exception will be raised, -1.0 will be returned,
 326    and *endptr will point just past the end of the converted value.
 327
 328    If any other failure occurs (for example lack of memory), -1.0 is returned
 329    and the appropriate Python exception will have been set.
 330 */
 331
 332 double
 333 PyOS_string_to_double(const char *s,
 334                       char **endptr,
 335                       PyObject *overflow_exception)
 336 {
 337         double x, result=-1.0;
 338         char *fail_pos;
 339
 340         errno = 0;
 341         PyFPE_START_PROTECT("PyOS_string_to_double", return -1.0)
 342         x = _PyOS_ascii_strtod(s, &fail_pos);
 343         PyFPE_END_PROTECT(x)
 344
 345         if (errno == ENOMEM) {
 346                 PyErr_NoMemory();
 347                 fail_pos = (char *)s;
 348         }
 349         else if (!endptr && (fail_pos == s || *fail_pos != '\0'))
 350                 PyErr_Format(PyExc_ValueError,
 351                               "could not convert string to float: "
 352                               "%.200s", s);
 353         else if (fail_pos == s)
 354                 PyErr_Format(PyExc_ValueError,
 355                               "could not convert string to float: "
 356                               "%.200s", s);
 357         else if (errno == ERANGE && fabs(x) >= 1.0 && overflow_exception)
 358                 PyErr_Format(overflow_exception,
 359                               "value too large to convert to float: "
 360                               "%.200s", s);
 361         else
 362                 result = x;
 363
 364         if (endptr != NULL)
 365                 *endptr = fail_pos;
 366         return result;
 367 }
 368
 369 /* Given a string that may have a decimal point in the current
 370    locale, change it back to a dot.  Since the string cannot get
 371    longer, no need for a maximum buffer size parameter. */
 372 Py_LOCAL_INLINE(void)
 373 change_decimal_from_locale_to_dot(char* buffer)
 374 {
 375         struct lconv *locale_data = localeconv();
 376         const char *decimal_point = locale_data->decimal_point;
 377
 378         if (decimal_point[0] != '.' || decimal_point[1] != 0) {
 379                 size_t decimal_point_len = strlen(decimal_point);
 380
 381                 if (*buffer == '+' || *buffer == '-')
 382                         buffer++;
 383                 while (Py_ISDIGIT(*buffer))
 384                         buffer++;
 385                 if (strncmp(buffer, decimal_point, decimal_point_len) == 0) {
 386                         *buffer = '.';
 387                         buffer++;
 388                         if (decimal_point_len > 1) {
 389                                 /* buffer needs to get smaller */
 390                                 size_t rest_len = strlen(buffer +
 391                                                      (decimal_point_len - 1));
 392                                 memmove(buffer,
 393                                         buffer + (decimal_point_len - 1),
 394                                         rest_len);
 395                                 buffer[rest_len] = 0;
 396                         }
 397                 }
 398         }
 399 }
 400
 401
 402 /* From the C99 standard, section 7.19.6:
 403 The exponent always contains at least two digits, and only as many more digits
 404 as necessary to represent the exponent.
 405 */
 406 #define MIN_EXPONENT_DIGITS 2
 407
 408 /* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS
 409    in length. */
 410 Py_LOCAL_INLINE(void)
 411 ensure_minimum_exponent_length(char* buffer, size_t buf_size)
 412 {
 413         char *p = strpbrk(buffer, "eE");
 414         if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
 415                 char *start = p + 2;
 416                 int exponent_digit_cnt = 0;
 417                 int leading_zero_cnt = 0;
 418                 int in_leading_zeros = 1;
 419                 int significant_digit_cnt;
 420
 421                 /* Skip over the exponent and the sign. */
 422                 p += 2;
 423
 424                 /* Find the end of the exponent, keeping track of leading
 425                    zeros. */
 426                 while (*p && Py_ISDIGIT(*p)) {
 427                         if (in_leading_zeros && *p == '0')
 428                                 ++leading_zero_cnt;
 429                         if (*p != '0')
 430                                 in_leading_zeros = 0;
 431                         ++p;
 432                         ++exponent_digit_cnt;
 433                 }
 434
 435                 significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
 436                 if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
 437                         /* If there are 2 exactly digits, we're done,
 438                            regardless of what they contain */
 439                 }
 440                 else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
 441                         int extra_zeros_cnt;
 442
 443                         /* There are more than 2 digits in the exponent.  See
 444                            if we can delete some of the leading zeros */
 445                         if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
 446                                 significant_digit_cnt = MIN_EXPONENT_DIGITS;
 447                         extra_zeros_cnt = exponent_digit_cnt -
 448                                 significant_digit_cnt;
 449
 450                         /* Delete extra_zeros_cnt worth of characters from the
 451                            front of the exponent */
 452                         assert(extra_zeros_cnt >= 0);
 453
 454                         /* Add one to significant_digit_cnt to copy the
 455                            trailing 0 byte, thus setting the length */
 456                         memmove(start,
 457                                 start + extra_zeros_cnt,
 458                                 significant_digit_cnt + 1);
 459                 }
 460                 else {
 461                         /* If there are fewer than 2 digits, add zeros
 462                            until there are 2, if there's enough room */
 463                         int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
 464                         if (start + zeros + exponent_digit_cnt + 1
 465                               < buffer + buf_size) {
 466                                 memmove(start + zeros, start,
 467                                         exponent_digit_cnt + 1);
 468                                 memset(start, '0', zeros);
 469                         }
 470                 }
 471         }
 472 }
 473
 474 /* Remove trailing zeros after the decimal point from a numeric string; also
 475    remove the decimal point if all digits following it are zero.  The numeric
 476    string must end in '\0', and should not have any leading or trailing
 477    whitespace.  Assumes that the decimal point is '.'. */
 478 Py_LOCAL_INLINE(void)
 479 remove_trailing_zeros(char *buffer)
 480 {
 481         char *old_fraction_end, *new_fraction_end, *end, *p;
 482
 483         p = buffer;
 484         if (*p == '-' || *p == '+')
 485                 /* Skip leading sign, if present */
 486                 ++p;
 487         while (Py_ISDIGIT(*p))
 488                 ++p;
 489
 490         /* if there's no decimal point there's nothing to do */
 491         if (*p++ != '.')
 492                 return;
 493
 494         /* scan any digits after the point */
 495         while (Py_ISDIGIT(*p))
 496                 ++p;
 497         old_fraction_end = p;
 498
 499         /* scan up to ending '\0' */
 500         while (*p != '\0')
 501                 p++;
 502         /* +1 to make sure that we move the null byte as well */
 503         end = p+1;
 504
 505         /* scan back from fraction_end, looking for removable zeros */
 506         p = old_fraction_end;
 507         while (*(p-1) == '0')
 508                 --p;
 509         /* and remove point if we've got that far */
 510         if (*(p-1) == '.')
 511                 --p;
 512         new_fraction_end = p;
 513
 514         memmove(new_fraction_end, old_fraction_end, end-old_fraction_end);
 515 }
 516
 517 /* Ensure that buffer has a decimal point in it.  The decimal point will not
 518    be in the current locale, it will always be '.'. Don't add a decimal point
 519    if an exponent is present.  Also, convert to exponential notation where
 520    adding a '.0' would produce too many significant digits (see issue 5864).
 521
 522    Returns a pointer to the fixed buffer, or NULL on failure.
 523 */
 524 Py_LOCAL_INLINE(char *)
 525 ensure_decimal_point(char* buffer, size_t buf_size, int precision)
 526 {
 527         int digit_count, insert_count = 0, convert_to_exp = 0;
 528         char *chars_to_insert, *digits_start;
 529
 530         /* search for the first non-digit character */
 531         char *p = buffer;
 532         if (*p == '-' || *p == '+')
 533                 /* Skip leading sign, if present.  I think this could only
 534                    ever be '-', but it can't hurt to check for both. */
 535                 ++p;
 536         digits_start = p;
 537         while (*p && Py_ISDIGIT(*p))
 538                 ++p;
 539         digit_count = Py_SAFE_DOWNCAST(p - digits_start, Py_ssize_t, int);
 540
 541         if (*p == '.') {
 542                 if (Py_ISDIGIT(*(p+1))) {
 543                         /* Nothing to do, we already have a decimal
 544                            point and a digit after it */
 545                 }
 546                 else {
 547                         /* We have a decimal point, but no following
 548                            digit.  Insert a zero after the decimal. */
 549                         /* can't ever get here via PyOS_double_to_string */
 550                         assert(precision == -1);
 551                         ++p;
 552                         chars_to_insert = "0";
 553                         insert_count = 1;
 554                 }
 555         }
 556         else if (!(*p == 'e' || *p == 'E')) {
 557                 /* Don't add ".0" if we have an exponent. */
 558                 if (digit_count == precision) {
 559                         /* issue 5864: don't add a trailing .0 in the case
 560                            where the '%g'-formatted result already has as many
 561                            significant digits as were requested.  Switch to
 562                            exponential notation instead. */
 563                         convert_to_exp = 1;
 564                         /* no exponent, no point, and we shouldn't land here
 565                            for infs and nans, so we must be at the end of the
 566                            string. */
 567                         assert(*p == '\0');
 568                 }
 569                 else {
 570                         assert(precision == -1 || digit_count < precision);
 571                         chars_to_insert = ".0";
 572                         insert_count = 2;
 573                 }
 574         }
 575         if (insert_count) {
 576                 size_t buf_len = strlen(buffer);
 577                 if (buf_len + insert_count + 1 >= buf_size) {
 578                         /* If there is not enough room in the buffer
 579                            for the additional text, just skip it.  It's
 580                            not worth generating an error over. */
 581                 }
 582                 else {
 583                         memmove(p + insert_count, p,
 584                                 buffer + strlen(buffer) - p + 1);
 585                         memcpy(p, chars_to_insert, insert_count);
 586                 }
 587         }
 588         if (convert_to_exp) {
 589                 int written;
 590                 size_t buf_avail;
 591                 p = digits_start;
 592                 /* insert decimal point */
 593                 assert(digit_count >= 1);
 594                 memmove(p+2, p+1, digit_count); /* safe, but overwrites nul */
 595                 p[1] = '.';
 596                 p += digit_count+1;
 597                 assert(p <= buf_size+buffer);
 598                 buf_avail = buf_size+buffer-p;
 599                 if (buf_avail == 0)
 600                         return NULL;
 601                 /* Add exponent.  It's okay to use lower case 'e': we only
 602                    arrive here as a result of using the empty format code or
 603                    repr/str builtins and those never want an upper case 'E' */
 604                 written = PyOS_snprintf(p, buf_avail, "e%+.02d", digit_count-1);
 605                 if (!(0 <= written &&
 606                       written < Py_SAFE_DOWNCAST(buf_avail, size_t, int)))
 607                         /* output truncated, or something else bad happened */
 608                         return NULL;
 609                 remove_trailing_zeros(buffer);
 610         }
 611         return buffer;
 612 }
 613
 614 /* see FORMATBUFLEN in unicodeobject.c */
 615 #define FLOAT_FORMATBUFLEN 120
 616
 617 /**
 618  * PyOS_ascii_formatd:
 619  * @buffer: A buffer to place the resulting string in
 620  * @buf_size: The length of the buffer.
 621  * @format: The printf()-style format to use for the
 622  *          code to use for converting.
 623  * @d: The #gdouble to convert
 624  *
 625  * Converts a #gdouble to a string, using the '.' as
 626  * decimal point. To format the number you pass in
 627  * a printf()-style format string. Allowed conversion
 628  * specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'Z'.
 629  *
 630  * 'Z' is the same as 'g', except it always has a decimal and
 631  *     at least one digit after the decimal.
 632  *
 633  * Return value: The pointer to the buffer with the converted string.
 634  * On failure returns NULL but does not set any Python exception.
 635  **/
 636 char *
 637 _PyOS_ascii_formatd(char       *buffer,
 638                    size_t      buf_size,
 639                    const char *format,
 640                    double      d,
 641                    int         precision)
 642 {
 643         char format_char;
 644         size_t format_len = strlen(format);
 645
 646         /* Issue 2264: code 'Z' requires copying the format.  'Z' is 'g', but
 647            also with at least one character past the decimal. */
 648         char tmp_format[FLOAT_FORMATBUFLEN];
 649
 650         /* The last character in the format string must be the format char */
 651         format_char = format[format_len - 1];
 652
 653         if (format[0] != '%')
 654                 return NULL;
 655
 656         /* I'm not sure why this test is here.  It's ensuring that the format
 657            string after the first character doesn't have a single quote, a
 658            lowercase l, or a percent. This is the reverse of the commented-out
 659            test about 10 lines ago. */
 660         if (strpbrk(format + 1, "'l%"))
 661                 return NULL;
 662
 663         /* Also curious about this function is that it accepts format strings
 664            like "%xg", which are invalid for floats.  In general, the
 665            interface to this function is not very good, but changing it is
 666            difficult because it's a public API. */
 667
 668         if (!(format_char == 'e' || format_char == 'E' ||
 669               format_char == 'f' || format_char == 'F' ||
 670               format_char == 'g' || format_char == 'G' ||
 671               format_char == 'Z'))
 672                 return NULL;
 673
 674         /* Map 'Z' format_char to 'g', by copying the format string and
 675            replacing the final char with a 'g' */
 676         if (format_char == 'Z') {
 677                 if (format_len + 1 >= sizeof(tmp_format)) {
 678                         /* The format won't fit in our copy.  Error out.  In
 679                            practice, this will never happen and will be
 680                            detected by returning NULL */
 681                         return NULL;
 682                 }
 683                 strcpy(tmp_format, format);
 684                 tmp_format[format_len - 1] = 'g';
 685                 format = tmp_format;
 686         }
 687
 688
 689         /* Have PyOS_snprintf do the hard work */
 690         PyOS_snprintf(buffer, buf_size, format, d);
 691
 692         /* Do various fixups on the return string */
 693
 694         /* Get the current locale, and find the decimal point string.
 695            Convert that string back to a dot. */
 696         change_decimal_from_locale_to_dot(buffer);
 697
 698         /* If an exponent exists, ensure that the exponent is at least
 699            MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
 700            for the extra zeros.  Also, if there are more than
 701            MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
 702            back to MIN_EXPONENT_DIGITS */
 703         ensure_minimum_exponent_length(buffer, buf_size);
 704
 705         /* If format_char is 'Z', make sure we have at least one character
 706            after the decimal point (and make sure we have a decimal point);
 707            also switch to exponential notation in some edge cases where the
 708            extra character would produce more significant digits that we
 709            really want. */
 710         if (format_char == 'Z')
 711                 buffer = ensure_decimal_point(buffer, buf_size, precision);
 712
 713         return buffer;
 714 }
 715
 716 char *
 717 PyOS_ascii_formatd(char       *buffer,
 718                    size_t      buf_size,
 719                    const char *format,
 720                    double      d)
 721 {
 722         if (PyErr_WarnEx(PyExc_DeprecationWarning,
 723                          "PyOS_ascii_formatd is deprecated, "
 724                          "use PyOS_double_to_string instead", 1) < 0)
 725                 return NULL;
 726
 727         return _PyOS_ascii_formatd(buffer, buf_size, format, d, -1);
 728 }
 729
 730 #ifdef PY_NO_SHORT_FLOAT_REPR
 731
 732 /* The fallback code to use if _Py_dg_dtoa is not available. */
 733
 734 PyAPI_FUNC(char *) PyOS_double_to_string(double val,
 735                                          char format_code,
 736                                          int precision,
 737                                          int flags,
 738                                          int *type)
 739 {
 740         char format[32];
 741         Py_ssize_t bufsize;
 742         char *buf;
 743         int t, exp;
 744         int upper = 0;
 745
 746         /* Validate format_code, and map upper and lower case */
 747         switch (format_code) {
 748         case 'e':          /* exponent */
 749         case 'f':          /* fixed */
 750         case 'g':          /* general */
 751                 break;
 752         case 'E':
 753                 upper = 1;
 754                 format_code = 'e';
 755                 break;
 756         case 'F':
 757                 upper = 1;
 758                 format_code = 'f';
 759                 break;
 760         case 'G':
 761                 upper = 1;
 762                 format_code = 'g';
 763                 break;
 764         case 'r':          /* repr format */
 765                 /* Supplied precision is unused, must be 0. */
 766                 if (precision != 0) {
 767                         PyErr_BadInternalCall();
 768                         return NULL;
 769                 }
 770                 /* The repr() precision (17 significant decimal digits) is the
 771                    minimal number that is guaranteed to have enough precision
 772                    so that if the number is read back in the exact same binary
 773                    value is recreated.  This is true for IEEE floating point
 774                    by design, and also happens to work for all other modern
 775                    hardware. */
 776                 precision = 17;
 777                 format_code = 'g';
 778                 break;
 779         default:
 780                 PyErr_BadInternalCall();
 781                 return NULL;
 782         }
 783
 784         /* Here's a quick-and-dirty calculation to figure out how big a buffer
 785            we need.  In general, for a finite float we need:
 786
 787              1 byte for each digit of the decimal significand, and
 788
 789              1 for a possible sign
 790              1 for a possible decimal point
 791              2 for a possible [eE][+-]
 792              1 for each digit of the exponent;  if we allow 19 digits
 793                total then we're safe up to exponents of 2**63.
 794              1 for the trailing nul byte
 795
 796            This gives a total of 24 + the number of digits in the significand,
 797            and the number of digits in the significand is:
 798
 799              for 'g' format: at most precision, except possibly
 800                when precision == 0, when it's 1.
 801              for 'e' format: precision+1
 802              for 'f' format: precision digits after the point, at least 1
 803                before.  To figure out how many digits appear before the point
 804                we have to examine the size of the number.  If fabs(val) < 1.0
 805                then there will be only one digit before the point.  If
 806                fabs(val) >= 1.0, then there are at most
 807
 808                  1+floor(log10(ceiling(fabs(val))))
 809
 810                digits before the point (where the 'ceiling' allows for the
 811                possibility that the rounding rounds the integer part of val
 812                up).  A safe upper bound for the above quantity is
 813                1+floor(exp/3), where exp is the unique integer such that 0.5
 814                <= fabs(val)/2**exp < 1.0.  This exp can be obtained from
 815                frexp.
 816
 817            So we allow room for precision+1 digits for all formats, plus an
 818            extra floor(exp/3) digits for 'f' format.
 819
 820         */
 821
 822         if (Py_IS_NAN(val) || Py_IS_INFINITY(val))
 823                 /* 3 for 'inf'/'nan', 1 for sign, 1 for '\0' */
 824                 bufsize = 5;
 825         else {
 826                 bufsize = 25 + precision;
 827                 if (format_code == 'f' && fabs(val) >= 1.0) {
 828                         frexp(val, &exp);
 829                         bufsize += exp/3;
 830                 }
 831         }
 832
 833         buf = PyMem_Malloc(bufsize);
 834         if (buf == NULL) {
 835                 PyErr_NoMemory();
 836                 return NULL;
 837         }
 838
 839         /* Handle nan and inf. */
 840         if (Py_IS_NAN(val)) {
 841                 strcpy(buf, "nan");
 842                 t = Py_DTST_NAN;
 843         } else if (Py_IS_INFINITY(val)) {
 844                 if (copysign(1., val) == 1.)
 845                         strcpy(buf, "inf");
 846                 else
 847                         strcpy(buf, "-inf");
 848                 t = Py_DTST_INFINITE;
 849         } else {
 850                 t = Py_DTST_FINITE;
 851                 if (flags & Py_DTSF_ADD_DOT_0)
 852                         format_code = 'Z';
 853
 854                 PyOS_snprintf(format, sizeof(format), "%%%s.%i%c",
 855                               (flags & Py_DTSF_ALT ? "#" : ""), precision,
 856                               format_code);
 857                 _PyOS_ascii_formatd(buf, bufsize, format, val, precision);
 858         }
 859
 860         /* Add sign when requested.  It's convenient (esp. when formatting
 861          complex numbers) to include a sign even for inf and nan. */
 862         if (flags & Py_DTSF_SIGN && buf[0] != '-') {
 863                 size_t len = strlen(buf);
 864                 /* the bufsize calculations above should ensure that we've got
 865                    space to add a sign */
 866                 assert((size_t)bufsize >= len+2);
 867                 memmove(buf+1, buf, len+1);
 868                 buf[0] = '+';
 869         }
 870         if (upper) {
 871                 /* Convert to upper case. */
 872                 char *p1;
 873                 for (p1 = buf; *p1; p1++)
 874                         *p1 = Py_TOUPPER(*p1);
 875         }
 876
 877         if (type)
 878                 *type = t;
 879         return buf;
 880 }
 881
 882 #else
 883
 884 /* _Py_dg_dtoa is available. */
 885
 886 /* I'm using a lookup table here so that I don't have to invent a non-locale
 887    specific way to convert to uppercase */
 888 #define OFS_INF 0
 889 #define OFS_NAN 1
 890 #define OFS_E 2
 891
 892 /* The lengths of these are known to the code below, so don't change them */
 893 static char *lc_float_strings[] = {
 894         "inf",
 895         "nan",
 896         "e",
 897 };
 898 static char *uc_float_strings[] = {
 899         "INF",
 900         "NAN",
 901         "E",
 902 };
 903
 904
 905 /* Convert a double d to a string, and return a PyMem_Malloc'd block of
 906    memory contain the resulting string.
 907
 908    Arguments:
 909      d is the double to be converted
 910      format_code is one of 'e', 'f', 'g', 'r'.  'e', 'f' and 'g'
 911        correspond to '%e', '%f' and '%g';  'r' corresponds to repr.
 912      mode is one of '0', '2' or '3', and is completely determined by
 913        format_code: 'e' and 'g' use mode 2; 'f' mode 3, 'r' mode 0.
 914      precision is the desired precision
 915      always_add_sign is nonzero if a '+' sign should be included for positive
 916        numbers
 917      add_dot_0_if_integer is nonzero if integers in non-exponential form
 918        should have ".0" added.  Only applies to format codes 'r' and 'g'.
 919      use_alt_formatting is nonzero if alternative formatting should be
 920        used.  Only applies to format codes 'e', 'f' and 'g'.  For code 'g',
 921        at most one of use_alt_formatting and add_dot_0_if_integer should
 922        be nonzero.
 923      type, if non-NULL, will be set to one of these constants to identify
 924        the type of the 'd' argument:
 925          Py_DTST_FINITE
 926          Py_DTST_INFINITE
 927          Py_DTST_NAN
 928
 929    Returns a PyMem_Malloc'd block of memory containing the resulting string,
 930     or NULL on error. If NULL is returned, the Python error has been set.
 931  */
 932
 933 static char *
 934 format_float_short(double d, char format_code,
 935                    int mode, Py_ssize_t precision,
 936                    int always_add_sign, int add_dot_0_if_integer,
 937                    int use_alt_formatting, char **float_strings, int *type)
 938 {
 939         char *buf = NULL;
 940         char *p = NULL;
 941         Py_ssize_t bufsize = 0;
 942         char *digits, *digits_end;
 943         int decpt_as_int, sign, exp_len, exp = 0, use_exp = 0;
 944         Py_ssize_t decpt, digits_len, vdigits_start, vdigits_end;
 945         _Py_SET_53BIT_PRECISION_HEADER;
 946
 947         /* _Py_dg_dtoa returns a digit string (no decimal point or exponent).
 948            Must be matched by a call to _Py_dg_freedtoa. */
 949         _Py_SET_53BIT_PRECISION_START;
 950         digits = _Py_dg_dtoa(d, mode, precision, &decpt_as_int, &sign,
 951                              &digits_end);
 952         _Py_SET_53BIT_PRECISION_END;
 953
 954         decpt = (Py_ssize_t)decpt_as_int;
 955         if (digits == NULL) {
 956                 /* The only failure mode is no memory. */
 957                 PyErr_NoMemory();
 958                 goto exit;
 959         }
 960         assert(digits_end != NULL && digits_end >= digits);
 961         digits_len = digits_end - digits;
 962
 963         if (digits_len && !Py_ISDIGIT(digits[0])) {
 964                 /* Infinities and nans here; adapt Gay's output,
 965                    so convert Infinity to inf and NaN to nan, and
 966                    ignore sign of nan. Then return. */
 967
 968                 /* ignore the actual sign of a nan */
 969                 if (digits[0] == 'n' || digits[0] == 'N')
 970                         sign = 0;
 971
 972                 /* We only need 5 bytes to hold the result "+inf\0" . */
 973                 bufsize = 5; /* Used later in an assert. */
 974                 buf = (char *)PyMem_Malloc(bufsize);
 975                 if (buf == NULL) {
 976                         PyErr_NoMemory();
 977                         goto exit;
 978                 }
 979                 p = buf;
 980
 981                 if (sign == 1) {
 982                         *p++ = '-';
 983                 }
 984                 else if (always_add_sign) {
 985                         *p++ = '+';
 986                 }
 987                 if (digits[0] == 'i' || digits[0] == 'I') {
 988                         strncpy(p, float_strings[OFS_INF], 3);
 989                         p += 3;
 990
 991                         if (type)
 992                                 *type = Py_DTST_INFINITE;
 993                 }
 994                 else if (digits[0] == 'n' || digits[0] == 'N') {
 995                         strncpy(p, float_strings[OFS_NAN], 3);
 996                         p += 3;
 997
 998                         if (type)
 999                                 *type = Py_DTST_NAN;
1000                 }
1001                 else {
1002                         /* shouldn't get here: Gay's code should always return
1003                            something starting with a digit, an 'I',  or 'N' */
1004                         strncpy(p, "ERR", 3);
1005                         p += 3;
1006                         assert(0);
1007                 }
1008                 goto exit;
1009         }
1010
1011         /* The result must be finite (not inf or nan). */
1012         if (type)
1013                 *type = Py_DTST_FINITE;
1014
1015
1016         /* We got digits back, format them.  We may need to pad 'digits'
1017            either on the left or right (or both) with extra zeros, so in
1018            general the resulting string has the form
1019
1020              [<sign>]<zeros><digits><zeros>[<exponent>]
1021
1022            where either of the <zeros> pieces could be empty, and there's a
1023            decimal point that could appear either in <digits> or in the
1024            leading or trailing <zeros>.
1025
1026            Imagine an infinite 'virtual' string vdigits, consisting of the
1027            string 'digits' (starting at index 0) padded on both the left and
1028            right with infinite strings of zeros.  We want to output a slice
1029
1030              vdigits[vdigits_start : vdigits_end]
1031
1032            of this virtual string.  Thus if vdigits_start < 0 then we'll end
1033            up producing some leading zeros; if vdigits_end > digits_len there
1034            will be trailing zeros in the output.  The next section of code
1035            determines whether to use an exponent or not, figures out the
1036            position 'decpt' of the decimal point, and computes 'vdigits_start'
1037            and 'vdigits_end'. */
1038         vdigits_end = digits_len;
1039         switch (format_code) {
1040         case 'e':
1041                 use_exp = 1;
1042                 vdigits_end = precision;
1043                 break;
1044         case 'f':
1045                 vdigits_end = decpt + precision;
1046                 break;
1047         case 'g':
1048                 if (decpt <= -4 || decpt >
1049                     (add_dot_0_if_integer ? precision-1 : precision))
1050                         use_exp = 1;
1051                 if (use_alt_formatting)
1052                         vdigits_end = precision;
1053                 break;
1054         case 'r':
1055                 /* convert to exponential format at 1e16.  We used to convert
1056                    at 1e17, but that gives odd-looking results for some values
1057                    when a 16-digit 'shortest' repr is padded with bogus zeros.
1058                    For example, repr(2e16+8) would give 20000000000000010.0;
1059                    the true value is 20000000000000008.0. */
1060                 if (decpt <= -4 || decpt > 16)
1061                         use_exp = 1;
1062                 break;
1063         default:
1064                 PyErr_BadInternalCall();
1065                 goto exit;
1066         }
1067
1068         /* if using an exponent, reset decimal point position to 1 and adjust
1069            exponent accordingly.*/
1070         if (use_exp) {
1071                 exp = decpt - 1;
1072                 decpt = 1;
1073         }
1074         /* ensure vdigits_start < decpt <= vdigits_end, or vdigits_start <
1075            decpt < vdigits_end if add_dot_0_if_integer and no exponent */
1076         vdigits_start = decpt <= 0 ? decpt-1 : 0;
1077         if (!use_exp && add_dot_0_if_integer)
1078                 vdigits_end = vdigits_end > decpt ? vdigits_end : decpt + 1;
1079         else
1080                 vdigits_end = vdigits_end > decpt ? vdigits_end : decpt;
1081
1082         /* double check inequalities */
1083         assert(vdigits_start <= 0 &&
1084                0 <= digits_len &&
1085                digits_len <= vdigits_end);
1086         /* decimal point should be in (vdigits_start, vdigits_end] */
1087         assert(vdigits_start < decpt && decpt <= vdigits_end);
1088
1089         /* Compute an upper bound how much memory we need. This might be a few
1090            chars too long, but no big deal. */
1091         bufsize =
1092                 /* sign, decimal point and trailing 0 byte */
1093                 3 +
1094
1095                 /* total digit count (including zero padding on both sides) */
1096                 (vdigits_end - vdigits_start) +
1097
1098                 /* exponent "e+100", max 3 numerical digits */
1099                 (use_exp ? 5 : 0);
1100
1101         /* Now allocate the memory and initialize p to point to the start of
1102            it. */
1103         buf = (char *)PyMem_Malloc(bufsize);
1104         if (buf == NULL) {
1105                 PyErr_NoMemory();
1106                 goto exit;
1107         }
1108         p = buf;
1109
1110         /* Add a negative sign if negative, and a plus sign if non-negative
1111            and always_add_sign is true. */
1112         if (sign == 1)
1113                 *p++ = '-';
1114         else if (always_add_sign)
1115                 *p++ = '+';
1116
1117         /* note that exactly one of the three 'if' conditions is true,
1118            so we include exactly one decimal point */
1119         /* Zero padding on left of digit string */
1120         if (decpt <= 0) {
1121                 memset(p, '0', decpt-vdigits_start);
1122                 p += decpt - vdigits_start;
1123                 *p++ = '.';
1124                 memset(p, '0', 0-decpt);
1125                 p += 0-decpt;
1126         }
1127         else {
1128                 memset(p, '0', 0-vdigits_start);
1129                 p += 0 - vdigits_start;
1130         }
1131
1132         /* Digits, with included decimal point */
1133         if (0 < decpt && decpt <= digits_len) {
1134                 strncpy(p, digits, decpt-0);
1135                 p += decpt-0;
1136                 *p++ = '.';
1137                 strncpy(p, digits+decpt, digits_len-decpt);
1138                 p += digits_len-decpt;
1139         }
1140         else {
1141                 strncpy(p, digits, digits_len);
1142                 p += digits_len;
1143         }
1144
1145         /* And zeros on the right */
1146         if (digits_len < decpt) {
1147                 memset(p, '0', decpt-digits_len);
1148                 p += decpt-digits_len;
1149                 *p++ = '.';
1150                 memset(p, '0', vdigits_end-decpt);
1151                 p += vdigits_end-decpt;
1152         }
1153         else {
1154                 memset(p, '0', vdigits_end-digits_len);
1155                 p += vdigits_end-digits_len;
1156         }
1157
1158         /* Delete a trailing decimal pt unless using alternative formatting. */
1159         if (p[-1] == '.' && !use_alt_formatting)
1160                 p--;
1161
1162         /* Now that we've done zero padding, add an exponent if needed. */
1163         if (use_exp) {
1164                 *p++ = float_strings[OFS_E][0];
1165                 exp_len = sprintf(p, "%+.02d", exp);
1166                 p += exp_len;
1167         }
1168   exit:
1169         if (buf) {
1170                 *p = '\0';
1171                 /* It's too late if this fails, as we've already stepped on
1172                    memory that isn't ours. But it's an okay debugging test. */
1173                 assert(p-buf < bufsize);
1174         }
1175         if (digits)
1176                 _Py_dg_freedtoa(digits);
1177
1178         return buf;
1179 }
1180
1181
1182 PyAPI_FUNC(char *) PyOS_double_to_string(double val,
1183                                          char format_code,
1184                                          int precision,
1185                                          int flags,
1186                                          int *type)
1187 {
1188         char **float_strings = lc_float_strings;
1189         int mode;
1190
1191         /* Validate format_code, and map upper and lower case. Compute the
1192            mode and make any adjustments as needed. */
1193         switch (format_code) {
1194         /* exponent */
1195         case 'E':
1196                 float_strings = uc_float_strings;
1197                 format_code = 'e';
1198                 /* Fall through. */
1199         case 'e':
1200                 mode = 2;
1201                 precision++;
1202                 break;
1203
1204         /* fixed */
1205         case 'F':
1206                 float_strings = uc_float_strings;
1207                 format_code = 'f';
1208                 /* Fall through. */
1209         case 'f':
1210                 mode = 3;
1211                 break;
1212
1213         /* general */
1214         case 'G':
1215                 float_strings = uc_float_strings;
1216                 format_code = 'g';
1217                 /* Fall through. */
1218         case 'g':
1219                 mode = 2;
1220                 /* precision 0 makes no sense for 'g' format; interpret as 1 */
1221                 if (precision == 0)
1222                         precision = 1;
1223                 break;
1224
1225         /* repr format */
1226         case 'r':
1227                 mode = 0;
1228                 /* Supplied precision is unused, must be 0. */
1229                 if (precision != 0) {
1230                         PyErr_BadInternalCall();
1231                         return NULL;
1232                 }
1233                 break;
1234
1235         default:
1236                 PyErr_BadInternalCall();
1237                 return NULL;
1238         }
1239
1240         return format_float_short(val, format_code, mode, precision,
1241                                   flags & Py_DTSF_SIGN,
1242                                   flags & Py_DTSF_ADD_DOT_0,
1243                                   flags & Py_DTSF_ALT,
1244                                   float_strings, type);
1245 }
1246 #endif /* ifdef PY_NO_SHORT_FLOAT_REPR */