sysdeps/ieee754/dbl-64/s_fma.c

   1 /* Compute x * y + z as ternary operation.
   2    Copyright (C) 2010-2024 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Lesser General Public
   7    License as published by the Free Software Foundation; either
   8    version 2.1 of the License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Lesser General Public License for more details.
  14
  15    You should have received a copy of the GNU Lesser General Public
  16    License along with the GNU C Library; if not, see
  17    <https://www.gnu.org/licenses/>.  */
  18
  19 #define NO_MATH_REDIRECT
  20 #include <float.h>
  21 #define dfmal __hide_dfmal
  22 #define f32xfmaf64 __hide_f32xfmaf64
  23 #include <math.h>
  24 #undef dfmal
  25 #undef f32xfmaf64
  26 #include <fenv.h>
  27 #include <ieee754.h>
  28 #include <math-barriers.h>
  29 #include <fenv_private.h>
  30 #include <libm-alias-double.h>
  31 #include <math-narrow-alias.h>
  32 #include <tininess.h>
  33 #include <math-use-builtins.h>
  34
  35 /* This implementation uses rounding to odd to avoid problems with
  36    double rounding.  See a paper by Boldo and Melquiond:
  37    http://www.lri.fr/~melquion/doc/08-tc.pdf  */
  38
  39 double
  40 __fma (double x, double y, double z)
  41 {
  42 #if USE_FMA_BUILTIN
  43   return __builtin_fma (x, y, z);
  44 #else
  45   /* Use generic implementation.  */
  46   union ieee754_double u, v, w;
  47   int adjust = 0;
  48   u.d = x;
  49   v.d = y;
  50   w.d = z;
  51   if (__builtin_expect (u.ieee.exponent + v.ieee.exponent
  52                         >= 0x7ff + IEEE754_DOUBLE_BIAS - DBL_MANT_DIG, 0)
  53       || __builtin_expect (u.ieee.exponent >= 0x7ff - DBL_MANT_DIG, 0)
  54       || __builtin_expect (v.ieee.exponent >= 0x7ff - DBL_MANT_DIG, 0)
  55       || __builtin_expect (w.ieee.exponent >= 0x7ff - DBL_MANT_DIG, 0)
  56       || __builtin_expect (u.ieee.exponent + v.ieee.exponent
  57                            <= IEEE754_DOUBLE_BIAS + DBL_MANT_DIG, 0))
  58     {
  59       /* If z is Inf, but x and y are finite, the result should be
  60          z rather than NaN.  */
  61       if (w.ieee.exponent == 0x7ff
  62           && u.ieee.exponent != 0x7ff
  63           && v.ieee.exponent != 0x7ff)
  64         return (z + x) + y;
  65       /* If z is zero and x are y are nonzero, compute the result
  66          as x * y to avoid the wrong sign of a zero result if x * y
  67          underflows to 0.  */
  68       if (z == 0 && x != 0 && y != 0)
  69         return x * y;
  70       /* If x or y or z is Inf/NaN, or if x * y is zero, compute as
  71          x * y + z.  */
  72       if (u.ieee.exponent == 0x7ff
  73           || v.ieee.exponent == 0x7ff
  74           || w.ieee.exponent == 0x7ff
  75           || x == 0
  76           || y == 0)
  77         return x * y + z;
  78       /* If fma will certainly overflow, compute as x * y.  */
  79       if (u.ieee.exponent + v.ieee.exponent > 0x7ff + IEEE754_DOUBLE_BIAS)
  80         return x * y;
  81       /* If x * y is less than 1/4 of DBL_TRUE_MIN, neither the
  82          result nor whether there is underflow depends on its exact
  83          value, only on its sign.  */
  84       if (u.ieee.exponent + v.ieee.exponent
  85           < IEEE754_DOUBLE_BIAS - DBL_MANT_DIG - 2)
  86         {
  87           int neg = u.ieee.negative ^ v.ieee.negative;
  88           double tiny = neg ? -0x1p-1074 : 0x1p-1074;
  89           if (w.ieee.exponent >= 3)
  90             return tiny + z;
  91           /* Scaling up, adding TINY and scaling down produces the
  92              correct result, because in round-to-nearest mode adding
  93              TINY has no effect and in other modes double rounding is
  94              harmless.  But it may not produce required underflow
  95              exceptions.  */
  96           v.d = z * 0x1p54 + tiny;
  97           if (TININESS_AFTER_ROUNDING
  98               ? v.ieee.exponent < 55
  99               : (w.ieee.exponent == 0
 100                  || (w.ieee.exponent == 1
 101                      && w.ieee.negative != neg
 102                      && w.ieee.mantissa1 == 0
 103                      && w.ieee.mantissa0 == 0)))
 104             {
 105               double force_underflow = x * y;
 106               math_force_eval (force_underflow);
 107             }
 108           return v.d * 0x1p-54;
 109         }
 110       if (u.ieee.exponent + v.ieee.exponent
 111           >= 0x7ff + IEEE754_DOUBLE_BIAS - DBL_MANT_DIG)
 112         {
 113           /* Compute 1p-53 times smaller result and multiply
 114              at the end.  */
 115           if (u.ieee.exponent > v.ieee.exponent)
 116             u.ieee.exponent -= DBL_MANT_DIG;
 117           else
 118             v.ieee.exponent -= DBL_MANT_DIG;
 119           /* If x + y exponent is very large and z exponent is very small,
 120              it doesn't matter if we don't adjust it.  */
 121           if (w.ieee.exponent > DBL_MANT_DIG)
 122             w.ieee.exponent -= DBL_MANT_DIG;
 123           adjust = 1;
 124         }
 125       else if (w.ieee.exponent >= 0x7ff - DBL_MANT_DIG)
 126         {
 127           /* Similarly.
 128              If z exponent is very large and x and y exponents are
 129              very small, adjust them up to avoid spurious underflows,
 130              rather than down.  */
 131           if (u.ieee.exponent + v.ieee.exponent
 132               <= IEEE754_DOUBLE_BIAS + 2 * DBL_MANT_DIG)
 133             {
 134               if (u.ieee.exponent > v.ieee.exponent)
 135                 u.ieee.exponent += 2 * DBL_MANT_DIG + 2;
 136               else
 137                 v.ieee.exponent += 2 * DBL_MANT_DIG + 2;
 138             }
 139           else if (u.ieee.exponent > v.ieee.exponent)
 140             {
 141               if (u.ieee.exponent > DBL_MANT_DIG)
 142                 u.ieee.exponent -= DBL_MANT_DIG;
 143             }
 144           else if (v.ieee.exponent > DBL_MANT_DIG)
 145             v.ieee.exponent -= DBL_MANT_DIG;
 146           w.ieee.exponent -= DBL_MANT_DIG;
 147           adjust = 1;
 148         }
 149       else if (u.ieee.exponent >= 0x7ff - DBL_MANT_DIG)
 150         {
 151           u.ieee.exponent -= DBL_MANT_DIG;
 152           if (v.ieee.exponent)
 153             v.ieee.exponent += DBL_MANT_DIG;
 154           else
 155             v.d *= 0x1p53;
 156         }
 157       else if (v.ieee.exponent >= 0x7ff - DBL_MANT_DIG)
 158         {
 159           v.ieee.exponent -= DBL_MANT_DIG;
 160           if (u.ieee.exponent)
 161             u.ieee.exponent += DBL_MANT_DIG;
 162           else
 163             u.d *= 0x1p53;
 164         }
 165       else /* if (u.ieee.exponent + v.ieee.exponent
 166                   <= IEEE754_DOUBLE_BIAS + DBL_MANT_DIG) */
 167         {
 168           if (u.ieee.exponent > v.ieee.exponent)
 169             u.ieee.exponent += 2 * DBL_MANT_DIG + 2;
 170           else
 171             v.ieee.exponent += 2 * DBL_MANT_DIG + 2;
 172           if (w.ieee.exponent <= 4 * DBL_MANT_DIG + 6)
 173             {
 174               if (w.ieee.exponent)
 175                 w.ieee.exponent += 2 * DBL_MANT_DIG + 2;
 176               else
 177                 w.d *= 0x1p108;
 178               adjust = -1;
 179             }
 180           /* Otherwise x * y should just affect inexact
 181              and nothing else.  */
 182         }
 183       x = u.d;
 184       y = v.d;
 185       z = w.d;
 186     }
 187
 188   /* Ensure correct sign of exact 0 + 0.  */
 189   if (__glibc_unlikely ((x == 0 || y == 0) && z == 0))
 190     {
 191       x = math_opt_barrier (x);
 192       return x * y + z;
 193     }
 194
 195   fenv_t env;
 196   libc_feholdexcept_setround (&env, FE_TONEAREST);
 197
 198   /* Multiplication m1 + m2 = x * y using Dekker's algorithm.  */
 199 #define C ((1 << (DBL_MANT_DIG + 1) / 2) + 1)
 200   double x1 = x * C;
 201   double y1 = y * C;
 202   double m1 = x * y;
 203   x1 = (x - x1) + x1;
 204   y1 = (y - y1) + y1;
 205   double x2 = x - x1;
 206   double y2 = y - y1;
 207   double m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;
 208
 209   /* Addition a1 + a2 = z + m1 using Knuth's algorithm.  */
 210   double a1 = z + m1;
 211   double t1 = a1 - z;
 212   double t2 = a1 - t1;
 213   t1 = m1 - t1;
 214   t2 = z - t2;
 215   double a2 = t1 + t2;
 216   /* Ensure the arithmetic is not scheduled after feclearexcept call.  */
 217   math_force_eval (m2);
 218   math_force_eval (a2);
 219   feclearexcept (FE_INEXACT);
 220
 221   /* If the result is an exact zero, ensure it has the correct sign.  */
 222   if (a1 == 0 && m2 == 0)
 223     {
 224       libc_feupdateenv (&env);
 225       /* Ensure that round-to-nearest value of z + m1 is not reused.  */
 226       z = math_opt_barrier (z);
 227       return z + m1;
 228     }
 229
 230   libc_fesetround (FE_TOWARDZERO);
 231
 232   /* Perform m2 + a2 addition with round to odd.  */
 233   u.d = a2 + m2;
 234
 235   if (__glibc_unlikely (adjust < 0))
 236     {
 237       if ((u.ieee.mantissa1 & 1) == 0)
 238         u.ieee.mantissa1 |= libc_fetestexcept (FE_INEXACT) != 0;
 239       v.d = a1 + u.d;
 240       /* Ensure the addition is not scheduled after fetestexcept call.  */
 241       math_force_eval (v.d);
 242     }
 243
 244   /* Reset rounding mode and test for inexact simultaneously.  */
 245   int j = libc_feupdateenv_test (&env, FE_INEXACT) != 0;
 246
 247   if (__glibc_likely (adjust == 0))
 248     {
 249       if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
 250         u.ieee.mantissa1 |= j;
 251       /* Result is a1 + u.d.  */
 252       return a1 + u.d;
 253     }
 254   else if (__glibc_likely (adjust > 0))
 255     {
 256       if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
 257         u.ieee.mantissa1 |= j;
 258       /* Result is a1 + u.d, scaled up.  */
 259       return (a1 + u.d) * 0x1p53;
 260     }
 261   else
 262     {
 263       /* If a1 + u.d is exact, the only rounding happens during
 264          scaling down.  */
 265       if (j == 0)
 266         return v.d * 0x1p-108;
 267       /* If result rounded to zero is not subnormal, no double
 268          rounding will occur.  */
 269       if (v.ieee.exponent > 108)
 270         return (a1 + u.d) * 0x1p-108;
 271       /* If v.d * 0x1p-108 with round to zero is a subnormal above
 272          or equal to DBL_MIN / 2, then v.d * 0x1p-108 shifts mantissa
 273          down just by 1 bit, which means v.ieee.mantissa1 |= j would
 274          change the round bit, not sticky or guard bit.
 275          v.d * 0x1p-108 never normalizes by shifting up,
 276          so round bit plus sticky bit should be already enough
 277          for proper rounding.  */
 278       if (v.ieee.exponent == 108)
 279         {
 280           /* If the exponent would be in the normal range when
 281              rounding to normal precision with unbounded exponent
 282              range, the exact result is known and spurious underflows
 283              must be avoided on systems detecting tininess after
 284              rounding.  */
 285           if (TININESS_AFTER_ROUNDING)
 286             {
 287               w.d = a1 + u.d;
 288               if (w.ieee.exponent == 109)
 289                 return w.d * 0x1p-108;
 290             }
 291           /* v.ieee.mantissa1 & 2 is LSB bit of the result before rounding,
 292              v.ieee.mantissa1 & 1 is the round bit and j is our sticky
 293              bit.  */
 294           w.d = 0.0;
 295           w.ieee.mantissa1 = ((v.ieee.mantissa1 & 3) << 1) | j;
 296           w.ieee.negative = v.ieee.negative;
 297           v.ieee.mantissa1 &= ~3U;
 298           v.d *= 0x1p-108;
 299           w.d *= 0x1p-2;
 300           return v.d + w.d;
 301         }
 302       v.ieee.mantissa1 |= j;
 303       return v.d * 0x1p-108;
 304     }
 305 #endif /* ! USE_FMA_BUILTIN  */
 306 }
 307 #ifndef __fma
 308 libm_alias_double (__fma, fma)
 309 libm_alias_double_narrow (__fma, fma)
 310 #endif