lib/strtod.c

   1 /* Copyright (C) 1991-1992, 1997, 1999, 2003, 2006, 2008-2019 Free Software
   2    Foundation, Inc.
   3
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation; either version 3 of the License, or
   7    (at your option) any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  16
  17 #if ! defined USE_LONG_DOUBLE
  18 # include <config.h>
  19 #endif
  20
  21 /* Specification.  */
  22 #include <stdlib.h>
  23
  24 #include <ctype.h>      /* isspace() */
  25 #include <errno.h>
  26 #include <float.h>      /* {DBL,LDBL}_{MIN,MAX} */
  27 #include <limits.h>     /* LONG_{MIN,MAX} */
  28 #include <locale.h>     /* localeconv() */
  29 #include <math.h>       /* NAN */
  30 #include <stdbool.h>
  31 #include <stdio.h>      /* sprintf() */
  32 #include <string.h>     /* strdup() */
  33 #if HAVE_NL_LANGINFO
  34 # include <langinfo.h>
  35 #endif
  36
  37 #include "c-ctype.h"
  38
  39 #undef MIN
  40 #undef MAX
  41 #ifdef USE_LONG_DOUBLE
  42 # define STRTOD strtold
  43 # define LDEXP ldexpl
  44 # if defined __hpux && defined __hppa
  45    /* We cannot call strtold on HP-UX/hppa, because its return type is a struct,
  46       not a 'long double'.  */
  47 #  define HAVE_UNDERLYING_STRTOD 0
  48 # else
  49 #  define HAVE_UNDERLYING_STRTOD HAVE_STRTOLD
  50 # endif
  51 # define DOUBLE long double
  52 # define MIN LDBL_MIN
  53 # define MAX LDBL_MAX
  54 # define L_(literal) literal##L
  55 #else
  56 # define STRTOD strtod
  57 # define LDEXP ldexp
  58 # define HAVE_UNDERLYING_STRTOD 1
  59 # define DOUBLE double
  60 # define MIN DBL_MIN
  61 # define MAX DBL_MAX
  62 # define L_(literal) literal
  63 #endif
  64
  65 #if (defined USE_LONG_DOUBLE ? HAVE_LDEXPM_IN_LIBC : HAVE_LDEXP_IN_LIBC)
  66 # define USE_LDEXP 1
  67 #else
  68 # define USE_LDEXP 0
  69 #endif
  70
  71 /* Return true if C is a space in the current locale, avoiding
  72    problems with signed char and isspace.  */
  73 static bool
  74 locale_isspace (char c)
  75 {
  76   unsigned char uc = c;
  77   return isspace (uc) != 0;
  78 }
  79
  80 /* Determine the decimal-point character according to the current locale.  */
  81 static char
  82 decimal_point_char (void)
  83 {
  84   const char *point;
  85   /* Determine it in a multithread-safe way.  We know nl_langinfo is
  86      multithread-safe on glibc systems and Mac OS X systems, but is not required
  87      to be multithread-safe by POSIX.  sprintf(), however, is multithread-safe.
  88      localeconv() is rarely multithread-safe.  */
  89 #if HAVE_NL_LANGINFO && (__GLIBC__ || defined __UCLIBC__ || (defined __APPLE__ && defined __MACH__))
  90   point = nl_langinfo (RADIXCHAR);
  91 #elif 1
  92   char pointbuf[5];
  93   sprintf (pointbuf, "%#.0f", 1.0);
  94   point = &pointbuf[1];
  95 #else
  96   point = localeconv () -> decimal_point;
  97 #endif
  98   /* The decimal point is always a single byte: either '.' or ','.  */
  99   return (point[0] != '\0' ? point[0] : '.');
 100 }
 101
 102 #if !USE_LDEXP
 103  #undef LDEXP
 104  #define LDEXP dummy_ldexp
 105  /* A dummy definition that will never be invoked.  */
 106  static DOUBLE LDEXP (DOUBLE x _GL_UNUSED, int exponent _GL_UNUSED)
 107  {
 108    abort ();
 109    return L_(0.0);
 110  }
 111 #endif
 112
 113 /* Return X * BASE**EXPONENT.  Return an extreme value and set errno
 114    to ERANGE if underflow or overflow occurs.  */
 115 static DOUBLE
 116 scale_radix_exp (DOUBLE x, int radix, long int exponent)
 117 {
 118   /* If RADIX == 10, this code is neither precise nor fast; it is
 119      merely a straightforward and relatively portable approximation.
 120      If N == 2, this code is precise on a radix-2 implementation,
 121      albeit perhaps not fast if ldexp is not in libc.  */
 122
 123   long int e = exponent;
 124
 125   if (USE_LDEXP && radix == 2)
 126     return LDEXP (x, e < INT_MIN ? INT_MIN : INT_MAX < e ? INT_MAX : e);
 127   else
 128     {
 129       DOUBLE r = x;
 130
 131       if (r != 0)
 132         {
 133           if (e < 0)
 134             {
 135               while (e++ != 0)
 136                 {
 137                   r /= radix;
 138                   if (r == 0 && x != 0)
 139                     {
 140                       errno = ERANGE;
 141                       break;
 142                     }
 143                 }
 144             }
 145           else
 146             {
 147               while (e-- != 0)
 148                 {
 149                   if (r < -MAX / radix)
 150                     {
 151                       errno = ERANGE;
 152                       return -HUGE_VAL;
 153                     }
 154                   else if (MAX / radix < r)
 155                     {
 156                       errno = ERANGE;
 157                       return HUGE_VAL;
 158                     }
 159                   else
 160                     r *= radix;
 161                 }
 162             }
 163         }
 164
 165       return r;
 166     }
 167 }
 168
 169 /* Parse a number at NPTR; this is a bit like strtol (NPTR, ENDPTR)
 170    except there are no leading spaces or signs or "0x", and ENDPTR is
 171    nonnull.  The number uses a base BASE (either 10 or 16) fraction, a
 172    radix RADIX (either 10 or 2) exponent, and exponent character
 173    EXPCHAR.  BASE is RADIX**RADIX_MULTIPLIER.  */
 174 static DOUBLE
 175 parse_number (const char *nptr,
 176               int base, int radix, int radix_multiplier, char radixchar,
 177               char expchar,
 178               char **endptr)
 179 {
 180   const char *s = nptr;
 181   const char *digits_start;
 182   const char *digits_end;
 183   const char *radixchar_ptr;
 184   long int exponent;
 185   DOUBLE num;
 186
 187   /* First, determine the start and end of the digit sequence.  */
 188   digits_start = s;
 189   radixchar_ptr = NULL;
 190   for (;; ++s)
 191     {
 192       if (base == 16 ? c_isxdigit (*s) : c_isdigit (*s))
 193         ;
 194       else if (radixchar_ptr == NULL && *s == radixchar)
 195         {
 196           /* Record that we have found the decimal point.  */
 197           radixchar_ptr = s;
 198         }
 199       else
 200         /* Any other character terminates the digit sequence.  */
 201         break;
 202     }
 203   digits_end = s;
 204   /* Now radixchar_ptr == NULL or
 205      digits_start <= radixchar_ptr < digits_end.  */
 206
 207   if (false)
 208     { /* Unoptimized.  */
 209       exponent =
 210         (radixchar_ptr != NULL
 211          ? - (long int) (digits_end - radixchar_ptr - 1)
 212          : 0);
 213     }
 214   else
 215     { /* Remove trailing zero digits.  This reduces rounding errors for
 216          inputs such as 1.0000000000 or 10000000000e-10.  */
 217       while (digits_end > digits_start)
 218         {
 219           if (digits_end - 1 == radixchar_ptr || *(digits_end - 1) == '0')
 220             digits_end--;
 221           else
 222             break;
 223         }
 224       exponent =
 225         (radixchar_ptr != NULL
 226          ? (digits_end > radixchar_ptr
 227             ? - (long int) (digits_end - radixchar_ptr - 1)
 228             : (long int) (radixchar_ptr - digits_end))
 229          : (long int) (s - digits_end));
 230     }
 231
 232   /* Then, convert the digit sequence to a number.  */
 233   {
 234     const char *dp;
 235     num = 0;
 236     for (dp = digits_start; dp < digits_end; dp++)
 237       if (dp != radixchar_ptr)
 238         {
 239           int digit;
 240
 241           /* Make sure that multiplication by BASE will not overflow.  */
 242           if (!(num <= MAX / base))
 243             {
 244               /* The value of the digit and all subsequent digits don't matter,
 245                  since we have already gotten as many digits as can be
 246                  represented in a 'DOUBLE'.  This doesn't necessarily mean that
 247                  the result will overflow: The exponent may reduce it to within
 248                  range.  */
 249               exponent +=
 250                 (digits_end - dp)
 251                 - (radixchar_ptr >= dp && radixchar_ptr < digits_end ? 1 : 0);
 252               break;
 253             }
 254
 255           /* Eat the next digit.  */
 256           if (c_isdigit (*dp))
 257             digit = *dp - '0';
 258           else if (base == 16 && c_isxdigit (*dp))
 259             digit = c_tolower (*dp) - ('a' - 10);
 260           else
 261             abort ();
 262           num = num * base + digit;
 263         }
 264   }
 265
 266   exponent = exponent * radix_multiplier;
 267
 268   /* Finally, parse the exponent.  */
 269   if (c_tolower (*s) == expchar && ! locale_isspace (s[1]))
 270     {
 271       /* Add any given exponent to the implicit one.  */
 272       int saved_errno = errno;
 273       char *end;
 274       long int value = strtol (s + 1, &end, 10);
 275       errno = saved_errno;
 276
 277       if (s + 1 != end)
 278         {
 279           /* Skip past the exponent, and add in the implicit exponent,
 280              resulting in an extreme value on overflow.  */
 281           s = end;
 282           exponent =
 283             (exponent < 0
 284              ? (value < LONG_MIN - exponent ? LONG_MIN : exponent + value)
 285              : (LONG_MAX - exponent < value ? LONG_MAX : exponent + value));
 286         }
 287     }
 288
 289   *endptr = (char *) s;
 290   return scale_radix_exp (num, radix, exponent);
 291 }
 292
 293 /* HP cc on HP-UX 10.20 has a bug with the constant expression -0.0.
 294    ICC 10.0 has a bug when optimizing the expression -zero.
 295    The expression -MIN * MIN does not work when cross-compiling
 296    to PowerPC on Mac OS X 10.5.  */
 297 #if defined __hpux || defined __sgi || defined __ICC
 298 static DOUBLE
 299 compute_minus_zero (void)
 300 {
 301   return -MIN * MIN;
 302 }
 303 # define minus_zero compute_minus_zero ()
 304 #else
 305 DOUBLE minus_zero = -0.0;
 306 #endif
 307
 308 /* Convert NPTR to a DOUBLE.  If ENDPTR is not NULL, a pointer to the
 309    character after the last one used in the number is put in *ENDPTR.  */
 310 DOUBLE
 311 STRTOD (const char *nptr, char **endptr)
 312 #if HAVE_UNDERLYING_STRTOD
 313 # ifdef USE_LONG_DOUBLE
 314 #  undef strtold
 315 # else
 316 #  undef strtod
 317 # endif
 318 #else
 319 # undef STRTOD
 320 # define STRTOD(NPTR,ENDPTR) \
 321    parse_number (NPTR, 10, 10, 1, radixchar, 'e', ENDPTR)
 322 #endif
 323 /* From here on, STRTOD refers to the underlying implementation.  It needs
 324    to handle only finite unsigned decimal numbers with non-null ENDPTR.  */
 325 {
 326   char radixchar;
 327   bool negative = false;
 328
 329   /* The number so far.  */
 330   DOUBLE num;
 331
 332   const char *s = nptr;
 333   const char *end;
 334   char *endbuf;
 335   int saved_errno = errno;
 336
 337   radixchar = decimal_point_char ();
 338
 339   /* Eat whitespace.  */
 340   while (locale_isspace (*s))
 341     ++s;
 342
 343   /* Get the sign.  */
 344   negative = *s == '-';
 345   if (*s == '-' || *s == '+')
 346     ++s;
 347
 348   num = STRTOD (s, &endbuf);
 349   end = endbuf;
 350
 351   if (c_isdigit (s[*s == radixchar]))
 352     {
 353       /* If a hex float was converted incorrectly, do it ourselves.
 354          If the string starts with "0x" but does not contain digits,
 355          consume the "0" ourselves.  If a hex float is followed by a
 356          'p' but no exponent, then adjust the end pointer.  */
 357       if (*s == '0' && c_tolower (s[1]) == 'x')
 358         {
 359           if (! c_isxdigit (s[2 + (s[2] == radixchar)]))
 360             {
 361               end = s + 1;
 362
 363               /* strtod() on z/OS returns ERANGE for "0x".  */
 364               errno = saved_errno;
 365             }
 366           else if (end <= s + 2)
 367             {
 368               num = parse_number (s + 2, 16, 2, 4, radixchar, 'p', &endbuf);
 369               end = endbuf;
 370             }
 371           else
 372             {
 373               const char *p = s + 2;
 374               while (p < end && c_tolower (*p) != 'p')
 375                 p++;
 376               if (p < end && ! c_isdigit (p[1 + (p[1] == '-' || p[1] == '+')]))
 377                 {
 378                   char *dup = strdup (s);
 379                   errno = saved_errno;
 380                   if (!dup)
 381                     {
 382                       /* Not really our day, is it.  Rounding errors are
 383                          better than outright failure.  */
 384                       num =
 385                         parse_number (s + 2, 16, 2, 4, radixchar, 'p', &endbuf);
 386                     }
 387                   else
 388                     {
 389                       dup[p - s] = '\0';
 390                       num = STRTOD (dup, &endbuf);
 391                       saved_errno = errno;
 392                       free (dup);
 393                       errno = saved_errno;
 394                     }
 395                   end = p;
 396                 }
 397             }
 398         }
 399       else
 400         {
 401           /* If "1e 1" was misparsed as 10.0 instead of 1.0, re-do the
 402              underlying STRTOD on a copy of the original string
 403              truncated to avoid the bug.  */
 404           const char *e = s + 1;
 405           while (e < end && c_tolower (*e) != 'e')
 406             e++;
 407           if (e < end && ! c_isdigit (e[1 + (e[1] == '-' || e[1] == '+')]))
 408             {
 409               char *dup = strdup (s);
 410               errno = saved_errno;
 411               if (!dup)
 412                 {
 413                   /* Not really our day, is it.  Rounding errors are
 414                      better than outright failure.  */
 415                   num = parse_number (s, 10, 10, 1, radixchar, 'e', &endbuf);
 416                 }
 417               else
 418                 {
 419                   dup[e - s] = '\0';
 420                   num = STRTOD (dup, &endbuf);
 421                   saved_errno = errno;
 422                   free (dup);
 423                   errno = saved_errno;
 424                 }
 425               end = e;
 426             }
 427         }
 428
 429       s = end;
 430     }
 431
 432   /* Check for infinities and NaNs.  */
 433   else if (c_tolower (*s) == 'i'
 434            && c_tolower (s[1]) == 'n'
 435            && c_tolower (s[2]) == 'f')
 436     {
 437       s += 3;
 438       if (c_tolower (*s) == 'i'
 439           && c_tolower (s[1]) == 'n'
 440           && c_tolower (s[2]) == 'i'
 441           && c_tolower (s[3]) == 't'
 442           && c_tolower (s[4]) == 'y')
 443         s += 5;
 444       num = HUGE_VAL;
 445       errno = saved_errno;
 446     }
 447   else if (c_tolower (*s) == 'n'
 448            && c_tolower (s[1]) == 'a'
 449            && c_tolower (s[2]) == 'n')
 450     {
 451       s += 3;
 452       if (*s == '(')
 453         {
 454           const char *p = s + 1;
 455           while (c_isalnum (*p))
 456             p++;
 457           if (*p == ')')
 458             s = p + 1;
 459         }
 460
 461       /* If the underlying implementation misparsed the NaN, assume
 462          its result is incorrect, and return a NaN.  Normally it's
 463          better to use the underlying implementation's result, since a
 464          nice implementation populates the bits of the NaN according
 465          to interpreting n-char-sequence as a hexadecimal number.  */
 466       if (s != end || num == num)
 467         num = NAN;
 468       errno = saved_errno;
 469     }
 470   else
 471     {
 472       /* No conversion could be performed.  */
 473       errno = EINVAL;
 474       s = nptr;
 475     }
 476
 477   if (endptr != NULL)
 478     *endptr = (char *) s;
 479   /* Special case -0.0, since at least ICC miscompiles negation.  We
 480      can't use copysign(), as that drags in -lm on some platforms.  */
 481   if (!num && negative)
 482     return minus_zero;
 483   return negative ? -num : num;
 484 }