lib/strtod.c

   1 /* Copyright (C) 1991-1992, 1997, 1999, 2003, 2006, 2008-2024 Free Software
   2    Foundation, Inc.
   3
   4    This file is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU Lesser General Public License as
   6    published by the Free Software Foundation, either version 3 of the
   7    License, or (at your option) any later version.
   8
   9    This file is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU Lesser General Public License for more details.
  13
  14    You should have received a copy of the GNU Lesser General Public License
  15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  16
  17 #if ! (defined USE_FLOAT || defined USE_LONG_DOUBLE)
  18 # include <config.h>
  19 #endif
  20
  21 /* Specification.  */
  22 #include <stdlib.h>
  23
  24 #include <ctype.h>      /* isspace() */
  25 #include <errno.h>
  26 #include <float.h>      /* {FLT,DBL,LDBL}_{MIN,MAX} */
  27 #include <limits.h>     /* LONG_{MIN,MAX} */
  28 #include <locale.h>     /* localeconv() */
  29 #include <math.h>       /* NAN */
  30 #include <stdio.h>      /* sprintf() */
  31 #include <string.h>     /* strdup() */
  32 #if HAVE_NL_LANGINFO
  33 # include <langinfo.h>
  34 #endif
  35
  36 #include "c-ctype.h"
  37
  38 #undef MIN
  39 #undef MAX
  40 #if defined USE_FLOAT
  41 # define STRTOD strtof
  42 # define LDEXP ldexpf
  43 # define HAVE_UNDERLYING_STRTOD HAVE_STRTOF
  44 # define DOUBLE float
  45 # define MIN FLT_MIN
  46 # define MAX FLT_MAX
  47 # define L_(literal) literal##f
  48 # if HAVE_LDEXPF_IN_LIBC
  49 #  define USE_LDEXP 1
  50 # else
  51 #  define USE_LDEXP 0
  52 # endif
  53 #elif defined USE_LONG_DOUBLE
  54 # define STRTOD strtold
  55 # define LDEXP ldexpl
  56 # if defined __hpux && defined __hppa
  57    /* We cannot call strtold on HP-UX/hppa, because its return type is a struct,
  58       not a 'long double'.  */
  59 #  define HAVE_UNDERLYING_STRTOD 0
  60 # elif STRTOLD_HAS_UNDERFLOW_BUG
  61    /* strtold would not set errno=ERANGE upon underflow.  */
  62 #  define HAVE_UNDERLYING_STRTOD 0
  63 # else
  64 #  define HAVE_UNDERLYING_STRTOD HAVE_STRTOLD
  65 # endif
  66 # define DOUBLE long double
  67 # define MIN LDBL_MIN
  68 # define MAX LDBL_MAX
  69 # define L_(literal) literal##L
  70 # if HAVE_LDEXPL_IN_LIBC
  71 #  define USE_LDEXP 1
  72 # else
  73 #  define USE_LDEXP 0
  74 # endif
  75 #else
  76 # define STRTOD strtod
  77 # define LDEXP ldexp
  78 # define HAVE_UNDERLYING_STRTOD 1
  79 # define DOUBLE double
  80 # define MIN DBL_MIN
  81 # define MAX DBL_MAX
  82 # define L_(literal) literal
  83 # if HAVE_LDEXP_IN_LIBC
  84 #  define USE_LDEXP 1
  85 # else
  86 #  define USE_LDEXP 0
  87 # endif
  88 #endif
  89
  90 /* Return true if C is a space in the current locale, avoiding
  91    problems with signed char and isspace.  */
  92 static bool
  93 locale_isspace (char c)
  94 {
  95   unsigned char uc = c;
  96   return isspace (uc) != 0;
  97 }
  98
  99 /* Determine the decimal-point character according to the current locale.  */
 100 static char
 101 decimal_point_char (void)
 102 {
 103   const char *point;
 104   /* Determine it in a multithread-safe way.  We know nl_langinfo is
 105      multithread-safe on glibc systems and Mac OS X systems, but is not required
 106      to be multithread-safe by POSIX.  sprintf(), however, is multithread-safe.
 107      localeconv() is rarely multithread-safe.  */
 108 #if HAVE_NL_LANGINFO && (__GLIBC__ || defined __UCLIBC__ || (defined __APPLE__ && defined __MACH__))
 109   point = nl_langinfo (RADIXCHAR);
 110 #elif 1
 111   char pointbuf[5];
 112   sprintf (pointbuf, "%#.0f", 1.0);
 113   point = &pointbuf[1];
 114 #else
 115   point = localeconv () -> decimal_point;
 116 #endif
 117   /* The decimal point is always a single byte: either '.' or ','.  */
 118   return (point[0] != '\0' ? point[0] : '.');
 119 }
 120
 121 #if !USE_LDEXP
 122  #undef LDEXP
 123  #define LDEXP dummy_ldexp
 124  /* A dummy definition that will never be invoked.  */
 125  static DOUBLE LDEXP (_GL_UNUSED DOUBLE x, _GL_UNUSED int exponent)
 126  {
 127    abort ();
 128    return L_(0.0);
 129  }
 130 #endif
 131
 132 /* Return X * BASE**EXPONENT.  Return an extreme value and set errno
 133    to ERANGE if underflow or overflow occurs.  */
 134 static DOUBLE
 135 scale_radix_exp (DOUBLE x, int radix, long int exponent)
 136 {
 137   /* If RADIX == 10, this code is neither precise nor fast; it is
 138      merely a straightforward and relatively portable approximation.
 139      If N == 2, this code is precise on a radix-2 implementation,
 140      albeit perhaps not fast if ldexp is not in libc.  */
 141
 142   long int e = exponent;
 143
 144   if (USE_LDEXP && radix == 2)
 145     return LDEXP (x, e < INT_MIN ? INT_MIN : INT_MAX < e ? INT_MAX : e);
 146   else
 147     {
 148       DOUBLE r = x;
 149
 150       if (r != 0)
 151         {
 152           if (e < 0)
 153             {
 154               while (e++ != 0)
 155                 {
 156                   r /= radix;
 157                   if (r == 0 && x != 0)
 158                     {
 159                       errno = ERANGE;
 160                       break;
 161                     }
 162                 }
 163             }
 164           else
 165             {
 166               while (e-- != 0)
 167                 {
 168                   if (r < -MAX / radix)
 169                     {
 170                       errno = ERANGE;
 171                       return -HUGE_VAL;
 172                     }
 173                   else if (MAX / radix < r)
 174                     {
 175                       errno = ERANGE;
 176                       return HUGE_VAL;
 177                     }
 178                   else
 179                     r *= radix;
 180                 }
 181             }
 182         }
 183
 184       return r;
 185     }
 186 }
 187
 188 /* Parse a number at NPTR; this is a bit like strtol (NPTR, ENDPTR)
 189    except there are no leading spaces or signs or "0x", and ENDPTR is
 190    nonnull.  The number uses a base BASE (either 10 or 16) fraction, a
 191    radix RADIX (either 10 or 2) exponent, and exponent character
 192    EXPCHAR.  BASE is RADIX**RADIX_MULTIPLIER.  */
 193 static DOUBLE
 194 parse_number (const char *nptr,
 195               int base, int radix, int radix_multiplier, char radixchar,
 196               char expchar,
 197               char **endptr)
 198 {
 199   const char *s = nptr;
 200   const char *digits_start;
 201   const char *digits_end;
 202   const char *radixchar_ptr;
 203   long int exponent;
 204   DOUBLE num;
 205
 206   /* First, determine the start and end of the digit sequence.  */
 207   digits_start = s;
 208   radixchar_ptr = NULL;
 209   for (;; ++s)
 210     {
 211       if (base == 16 ? c_isxdigit (*s) : c_isdigit (*s))
 212         ;
 213       else if (radixchar_ptr == NULL && *s == radixchar)
 214         {
 215           /* Record that we have found the decimal point.  */
 216           radixchar_ptr = s;
 217         }
 218       else
 219         /* Any other character terminates the digit sequence.  */
 220         break;
 221     }
 222   digits_end = s;
 223   /* Now radixchar_ptr == NULL or
 224      digits_start <= radixchar_ptr < digits_end.  */
 225
 226   if (false)
 227     { /* Unoptimized.  */
 228       exponent =
 229         (radixchar_ptr != NULL
 230          ? - (long int) (digits_end - radixchar_ptr - 1)
 231          : 0);
 232     }
 233   else
 234     { /* Remove trailing zero digits.  This reduces rounding errors for
 235          inputs such as 1.0000000000 or 10000000000e-10.  */
 236       while (digits_end > digits_start)
 237         {
 238           if (digits_end - 1 == radixchar_ptr || *(digits_end - 1) == '0')
 239             digits_end--;
 240           else
 241             break;
 242         }
 243       exponent =
 244         (radixchar_ptr != NULL
 245          ? (digits_end > radixchar_ptr
 246             ? - (long int) (digits_end - radixchar_ptr - 1)
 247             : (long int) (radixchar_ptr - digits_end))
 248          : (long int) (s - digits_end));
 249     }
 250
 251   /* Then, convert the digit sequence to a number.  */
 252   {
 253     const char *dp;
 254     num = 0;
 255     for (dp = digits_start; dp < digits_end; dp++)
 256       if (dp != radixchar_ptr)
 257         {
 258           int digit;
 259
 260           /* Make sure that multiplication by BASE will not overflow.  */
 261           if (!(num <= MAX / base))
 262             {
 263               /* The value of the digit and all subsequent digits don't matter,
 264                  since we have already gotten as many digits as can be
 265                  represented in a 'DOUBLE'.  This doesn't necessarily mean that
 266                  the result will overflow: The exponent may reduce it to within
 267                  range.  */
 268               exponent +=
 269                 (digits_end - dp)
 270                 - (radixchar_ptr >= dp && radixchar_ptr < digits_end ? 1 : 0);
 271               break;
 272             }
 273
 274           /* Eat the next digit.  */
 275           if (c_isdigit (*dp))
 276             digit = *dp - '0';
 277           else if (base == 16 && c_isxdigit (*dp))
 278             digit = c_tolower (*dp) - ('a' - 10);
 279           else
 280             abort ();
 281           num = num * base + digit;
 282         }
 283   }
 284
 285   exponent = exponent * radix_multiplier;
 286
 287   /* Finally, parse the exponent.  */
 288   if (c_tolower (*s) == expchar && ! locale_isspace (s[1]))
 289     {
 290       /* Add any given exponent to the implicit one.  */
 291       int saved_errno = errno;
 292       char *end;
 293       long int value = strtol (s + 1, &end, 10);
 294       errno = saved_errno;
 295
 296       if (s + 1 != end)
 297         {
 298           /* Skip past the exponent, and add in the implicit exponent,
 299              resulting in an extreme value on overflow.  */
 300           s = end;
 301           exponent =
 302             (exponent < 0
 303              ? (value < LONG_MIN - exponent ? LONG_MIN : exponent + value)
 304              : (LONG_MAX - exponent < value ? LONG_MAX : exponent + value));
 305         }
 306     }
 307
 308   *endptr = (char *) s;
 309   return scale_radix_exp (num, radix, exponent);
 310 }
 311
 312 /* HP cc on HP-UX 10.20 has a bug with the constant expression -0.0.
 313    ICC 10.0 has a bug when optimizing the expression -zero.
 314    The expression -MIN * MIN does not work when cross-compiling
 315    to PowerPC on Mac OS X 10.5.  */
 316 static DOUBLE
 317 minus_zero (void)
 318 {
 319 #if defined __hpux || defined __sgi || defined __ICC
 320   return -MIN * MIN;
 321 #else
 322   return -0.0;
 323 #endif
 324 }
 325
 326 /* Convert NPTR to a DOUBLE.  If ENDPTR is not NULL, a pointer to the
 327    character after the last one used in the number is put in *ENDPTR.  */
 328 DOUBLE
 329 STRTOD (const char *nptr, char **endptr)
 330 #if HAVE_UNDERLYING_STRTOD
 331 # if defined USE_FLOAT
 332 #  undef strtof
 333 # elif defined USE_LONG_DOUBLE
 334 #  undef strtold
 335 # else
 336 #  undef strtod
 337 # endif
 338 #else
 339 # undef STRTOD
 340 # define STRTOD(NPTR,ENDPTR) \
 341    parse_number (NPTR, 10, 10, 1, radixchar, 'e', ENDPTR)
 342 #endif
 343 /* From here on, STRTOD refers to the underlying implementation.  It needs
 344    to handle only finite unsigned decimal numbers with non-null ENDPTR.  */
 345 {
 346   char radixchar;
 347   bool negative = false;
 348
 349   /* The number so far.  */
 350   DOUBLE num;
 351
 352   const char *s = nptr;
 353   const char *end;
 354   char *endbuf;
 355   int saved_errno = errno;
 356
 357   radixchar = decimal_point_char ();
 358
 359   /* Eat whitespace.  */
 360   while (locale_isspace (*s))
 361     ++s;
 362
 363   /* Get the sign.  */
 364   negative = *s == '-';
 365   if (*s == '-' || *s == '+')
 366     ++s;
 367
 368   num = STRTOD (s, &endbuf);
 369   end = endbuf;
 370
 371   if (c_isdigit (s[*s == radixchar]))
 372     {
 373       /* If a hex float was converted incorrectly, do it ourselves.
 374          If the string starts with "0x" but does not contain digits,
 375          consume the "0" ourselves.  If a hex float is followed by a
 376          'p' but no exponent, then adjust the end pointer.  */
 377       if (*s == '0' && c_tolower (s[1]) == 'x')
 378         {
 379           if (! c_isxdigit (s[2 + (s[2] == radixchar)]))
 380             {
 381               end = s + 1;
 382
 383               /* strtod() on z/OS returns ERANGE for "0x".  */
 384               errno = saved_errno;
 385             }
 386           else if (end <= s + 2)
 387             {
 388               num = parse_number (s + 2, 16, 2, 4, radixchar, 'p', &endbuf);
 389               end = endbuf;
 390             }
 391           else
 392             {
 393               const char *p = s + 2;
 394               while (p < end && c_tolower (*p) != 'p')
 395                 p++;
 396               if (p < end && ! c_isdigit (p[1 + (p[1] == '-' || p[1] == '+')]))
 397                 {
 398                   char *dup = strdup (s);
 399                   errno = saved_errno;
 400                   if (!dup)
 401                     {
 402                       /* Not really our day, is it.  Rounding errors are
 403                          better than outright failure.  */
 404                       num =
 405                         parse_number (s + 2, 16, 2, 4, radixchar, 'p', &endbuf);
 406                     }
 407                   else
 408                     {
 409                       dup[p - s] = '\0';
 410                       num = STRTOD (dup, &endbuf);
 411                       saved_errno = errno;
 412                       free (dup);
 413                       errno = saved_errno;
 414                     }
 415                   end = p;
 416                 }
 417             }
 418         }
 419       else
 420         {
 421           /* If "1e 1" was misparsed as 10.0 instead of 1.0, re-do the
 422              underlying STRTOD on a copy of the original string
 423              truncated to avoid the bug.  */
 424           const char *e = s + 1;
 425           while (e < end && c_tolower (*e) != 'e')
 426             e++;
 427           if (e < end && ! c_isdigit (e[1 + (e[1] == '-' || e[1] == '+')]))
 428             {
 429               char *dup = strdup (s);
 430               errno = saved_errno;
 431               if (!dup)
 432                 {
 433                   /* Not really our day, is it.  Rounding errors are
 434                      better than outright failure.  */
 435                   num = parse_number (s, 10, 10, 1, radixchar, 'e', &endbuf);
 436                 }
 437               else
 438                 {
 439                   dup[e - s] = '\0';
 440                   num = STRTOD (dup, &endbuf);
 441                   saved_errno = errno;
 442                   free (dup);
 443                   errno = saved_errno;
 444                 }
 445               end = e;
 446             }
 447         }
 448
 449       s = end;
 450     }
 451
 452   /* Check for infinities and NaNs.  */
 453   else if (c_tolower (*s) == 'i'
 454            && c_tolower (s[1]) == 'n'
 455            && c_tolower (s[2]) == 'f')
 456     {
 457       s += 3;
 458       if (c_tolower (*s) == 'i'
 459           && c_tolower (s[1]) == 'n'
 460           && c_tolower (s[2]) == 'i'
 461           && c_tolower (s[3]) == 't'
 462           && c_tolower (s[4]) == 'y')
 463         s += 5;
 464       num = HUGE_VAL;
 465       errno = saved_errno;
 466     }
 467   else if (c_tolower (*s) == 'n'
 468            && c_tolower (s[1]) == 'a'
 469            && c_tolower (s[2]) == 'n')
 470     {
 471       s += 3;
 472       if (*s == '(')
 473         {
 474           const char *p = s + 1;
 475           while (c_isalnum (*p))
 476             p++;
 477           if (*p == ')')
 478             s = p + 1;
 479         }
 480
 481       /* If the underlying implementation misparsed the NaN, assume
 482          its result is incorrect, and return a NaN.  Normally it's
 483          better to use the underlying implementation's result, since a
 484          nice implementation populates the bits of the NaN according
 485          to interpreting n-char-sequence as a hexadecimal number.  */
 486       if (s != end || num == num)
 487         num = NAN;
 488       errno = saved_errno;
 489     }
 490   else
 491     {
 492       /* No conversion could be performed.  */
 493       errno = EINVAL;
 494       s = nptr;
 495     }
 496
 497   if (endptr != NULL)
 498     *endptr = (char *) s;
 499   /* Special case -0.0, since at least ICC miscompiles negation.  We
 500      can't use copysign(), as that drags in -lm on some platforms.  */
 501   if (!num && negative)
 502     return minus_zero ();
 503   return negative ? -num : num;
 504 }