sysdeps/ieee754/dbl-64/s_fma.c

   1 /* Compute x * y + z as ternary operation.
   2    Copyright (C) 2010-2018 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4    Contributed by Jakub Jelinek <jakub@redhat.com>, 2010.
   5
   6    The GNU C Library is free software; you can redistribute it and/or
   7    modify it under the terms of the GNU Lesser General Public
   8    License as published by the Free Software Foundation; either
   9    version 2.1 of the License, or (at your option) any later version.
  10
  11    The GNU C Library is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14    Lesser General Public License for more details.
  15
  16    You should have received a copy of the GNU Lesser General Public
  17    License along with the GNU C Library; if not, see
  18    <http://www.gnu.org/licenses/>.  */
  19
  20 #include <float.h>
  21 #include <math.h>
  22 #include <fenv.h>
  23 #include <ieee754.h>
  24 #include <math_private.h>
  25 #include <libm-alias-double.h>
  26 #include <tininess.h>
  27
  28 /* This implementation uses rounding to odd to avoid problems with
  29    double rounding.  See a paper by Boldo and Melquiond:
  30    http://www.lri.fr/~melquion/doc/08-tc.pdf  */
  31
  32 double
  33 __fma (double x, double y, double z)
  34 {
  35   union ieee754_double u, v, w;
  36   int adjust = 0;
  37   u.d = x;
  38   v.d = y;
  39   w.d = z;
  40   if (__builtin_expect (u.ieee.exponent + v.ieee.exponent
  41                         >= 0x7ff + IEEE754_DOUBLE_BIAS - DBL_MANT_DIG, 0)
  42       || __builtin_expect (u.ieee.exponent >= 0x7ff - DBL_MANT_DIG, 0)
  43       || __builtin_expect (v.ieee.exponent >= 0x7ff - DBL_MANT_DIG, 0)
  44       || __builtin_expect (w.ieee.exponent >= 0x7ff - DBL_MANT_DIG, 0)
  45       || __builtin_expect (u.ieee.exponent + v.ieee.exponent
  46                            <= IEEE754_DOUBLE_BIAS + DBL_MANT_DIG, 0))
  47     {
  48       /* If z is Inf, but x and y are finite, the result should be
  49          z rather than NaN.  */
  50       if (w.ieee.exponent == 0x7ff
  51           && u.ieee.exponent != 0x7ff
  52           && v.ieee.exponent != 0x7ff)
  53         return (z + x) + y;
  54       /* If z is zero and x are y are nonzero, compute the result
  55          as x * y to avoid the wrong sign of a zero result if x * y
  56          underflows to 0.  */
  57       if (z == 0 && x != 0 && y != 0)
  58         return x * y;
  59       /* If x or y or z is Inf/NaN, or if x * y is zero, compute as
  60          x * y + z.  */
  61       if (u.ieee.exponent == 0x7ff
  62           || v.ieee.exponent == 0x7ff
  63           || w.ieee.exponent == 0x7ff
  64           || x == 0
  65           || y == 0)
  66         return x * y + z;
  67       /* If fma will certainly overflow, compute as x * y.  */
  68       if (u.ieee.exponent + v.ieee.exponent > 0x7ff + IEEE754_DOUBLE_BIAS)
  69         return x * y;
  70       /* If x * y is less than 1/4 of DBL_TRUE_MIN, neither the
  71          result nor whether there is underflow depends on its exact
  72          value, only on its sign.  */
  73       if (u.ieee.exponent + v.ieee.exponent
  74           < IEEE754_DOUBLE_BIAS - DBL_MANT_DIG - 2)
  75         {
  76           int neg = u.ieee.negative ^ v.ieee.negative;
  77           double tiny = neg ? -0x1p-1074 : 0x1p-1074;
  78           if (w.ieee.exponent >= 3)
  79             return tiny + z;
  80           /* Scaling up, adding TINY and scaling down produces the
  81              correct result, because in round-to-nearest mode adding
  82              TINY has no effect and in other modes double rounding is
  83              harmless.  But it may not produce required underflow
  84              exceptions.  */
  85           v.d = z * 0x1p54 + tiny;
  86           if (TININESS_AFTER_ROUNDING
  87               ? v.ieee.exponent < 55
  88               : (w.ieee.exponent == 0
  89                  || (w.ieee.exponent == 1
  90                      && w.ieee.negative != neg
  91                      && w.ieee.mantissa1 == 0
  92                      && w.ieee.mantissa0 == 0)))
  93             {
  94               double force_underflow = x * y;
  95               math_force_eval (force_underflow);
  96             }
  97           return v.d * 0x1p-54;
  98         }
  99       if (u.ieee.exponent + v.ieee.exponent
 100           >= 0x7ff + IEEE754_DOUBLE_BIAS - DBL_MANT_DIG)
 101         {
 102           /* Compute 1p-53 times smaller result and multiply
 103              at the end.  */
 104           if (u.ieee.exponent > v.ieee.exponent)
 105             u.ieee.exponent -= DBL_MANT_DIG;
 106           else
 107             v.ieee.exponent -= DBL_MANT_DIG;
 108           /* If x + y exponent is very large and z exponent is very small,
 109              it doesn't matter if we don't adjust it.  */
 110           if (w.ieee.exponent > DBL_MANT_DIG)
 111             w.ieee.exponent -= DBL_MANT_DIG;
 112           adjust = 1;
 113         }
 114       else if (w.ieee.exponent >= 0x7ff - DBL_MANT_DIG)
 115         {
 116           /* Similarly.
 117              If z exponent is very large and x and y exponents are
 118              very small, adjust them up to avoid spurious underflows,
 119              rather than down.  */
 120           if (u.ieee.exponent + v.ieee.exponent
 121               <= IEEE754_DOUBLE_BIAS + 2 * DBL_MANT_DIG)
 122             {
 123               if (u.ieee.exponent > v.ieee.exponent)
 124                 u.ieee.exponent += 2 * DBL_MANT_DIG + 2;
 125               else
 126                 v.ieee.exponent += 2 * DBL_MANT_DIG + 2;
 127             }
 128           else if (u.ieee.exponent > v.ieee.exponent)
 129             {
 130               if (u.ieee.exponent > DBL_MANT_DIG)
 131                 u.ieee.exponent -= DBL_MANT_DIG;
 132             }
 133           else if (v.ieee.exponent > DBL_MANT_DIG)
 134             v.ieee.exponent -= DBL_MANT_DIG;
 135           w.ieee.exponent -= DBL_MANT_DIG;
 136           adjust = 1;
 137         }
 138       else if (u.ieee.exponent >= 0x7ff - DBL_MANT_DIG)
 139         {
 140           u.ieee.exponent -= DBL_MANT_DIG;
 141           if (v.ieee.exponent)
 142             v.ieee.exponent += DBL_MANT_DIG;
 143           else
 144             v.d *= 0x1p53;
 145         }
 146       else if (v.ieee.exponent >= 0x7ff - DBL_MANT_DIG)
 147         {
 148           v.ieee.exponent -= DBL_MANT_DIG;
 149           if (u.ieee.exponent)
 150             u.ieee.exponent += DBL_MANT_DIG;
 151           else
 152             u.d *= 0x1p53;
 153         }
 154       else /* if (u.ieee.exponent + v.ieee.exponent
 155                   <= IEEE754_DOUBLE_BIAS + DBL_MANT_DIG) */
 156         {
 157           if (u.ieee.exponent > v.ieee.exponent)
 158             u.ieee.exponent += 2 * DBL_MANT_DIG + 2;
 159           else
 160             v.ieee.exponent += 2 * DBL_MANT_DIG + 2;
 161           if (w.ieee.exponent <= 4 * DBL_MANT_DIG + 6)
 162             {
 163               if (w.ieee.exponent)
 164                 w.ieee.exponent += 2 * DBL_MANT_DIG + 2;
 165               else
 166                 w.d *= 0x1p108;
 167               adjust = -1;
 168             }
 169           /* Otherwise x * y should just affect inexact
 170              and nothing else.  */
 171         }
 172       x = u.d;
 173       y = v.d;
 174       z = w.d;
 175     }
 176
 177   /* Ensure correct sign of exact 0 + 0.  */
 178   if (__glibc_unlikely ((x == 0 || y == 0) && z == 0))
 179     {
 180       x = math_opt_barrier (x);
 181       return x * y + z;
 182     }
 183
 184   fenv_t env;
 185   libc_feholdexcept_setround (&env, FE_TONEAREST);
 186
 187   /* Multiplication m1 + m2 = x * y using Dekker's algorithm.  */
 188 #define C ((1 << (DBL_MANT_DIG + 1) / 2) + 1)
 189   double x1 = x * C;
 190   double y1 = y * C;
 191   double m1 = x * y;
 192   x1 = (x - x1) + x1;
 193   y1 = (y - y1) + y1;
 194   double x2 = x - x1;
 195   double y2 = y - y1;
 196   double m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;
 197
 198   /* Addition a1 + a2 = z + m1 using Knuth's algorithm.  */
 199   double a1 = z + m1;
 200   double t1 = a1 - z;
 201   double t2 = a1 - t1;
 202   t1 = m1 - t1;
 203   t2 = z - t2;
 204   double a2 = t1 + t2;
 205   /* Ensure the arithmetic is not scheduled after feclearexcept call.  */
 206   math_force_eval (m2);
 207   math_force_eval (a2);
 208   feclearexcept (FE_INEXACT);
 209
 210   /* If the result is an exact zero, ensure it has the correct sign.  */
 211   if (a1 == 0 && m2 == 0)
 212     {
 213       libc_feupdateenv (&env);
 214       /* Ensure that round-to-nearest value of z + m1 is not reused.  */
 215       z = math_opt_barrier (z);
 216       return z + m1;
 217     }
 218
 219   libc_fesetround (FE_TOWARDZERO);
 220
 221   /* Perform m2 + a2 addition with round to odd.  */
 222   u.d = a2 + m2;
 223
 224   if (__glibc_unlikely (adjust < 0))
 225     {
 226       if ((u.ieee.mantissa1 & 1) == 0)
 227         u.ieee.mantissa1 |= libc_fetestexcept (FE_INEXACT) != 0;
 228       v.d = a1 + u.d;
 229       /* Ensure the addition is not scheduled after fetestexcept call.  */
 230       math_force_eval (v.d);
 231     }
 232
 233   /* Reset rounding mode and test for inexact simultaneously.  */
 234   int j = libc_feupdateenv_test (&env, FE_INEXACT) != 0;
 235
 236   if (__glibc_likely (adjust == 0))
 237     {
 238       if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
 239         u.ieee.mantissa1 |= j;
 240       /* Result is a1 + u.d.  */
 241       return a1 + u.d;
 242     }
 243   else if (__glibc_likely (adjust > 0))
 244     {
 245       if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
 246         u.ieee.mantissa1 |= j;
 247       /* Result is a1 + u.d, scaled up.  */
 248       return (a1 + u.d) * 0x1p53;
 249     }
 250   else
 251     {
 252       /* If a1 + u.d is exact, the only rounding happens during
 253          scaling down.  */
 254       if (j == 0)
 255         return v.d * 0x1p-108;
 256       /* If result rounded to zero is not subnormal, no double
 257          rounding will occur.  */
 258       if (v.ieee.exponent > 108)
 259         return (a1 + u.d) * 0x1p-108;
 260       /* If v.d * 0x1p-108 with round to zero is a subnormal above
 261          or equal to DBL_MIN / 2, then v.d * 0x1p-108 shifts mantissa
 262          down just by 1 bit, which means v.ieee.mantissa1 |= j would
 263          change the round bit, not sticky or guard bit.
 264          v.d * 0x1p-108 never normalizes by shifting up,
 265          so round bit plus sticky bit should be already enough
 266          for proper rounding.  */
 267       if (v.ieee.exponent == 108)
 268         {
 269           /* If the exponent would be in the normal range when
 270              rounding to normal precision with unbounded exponent
 271              range, the exact result is known and spurious underflows
 272              must be avoided on systems detecting tininess after
 273              rounding.  */
 274           if (TININESS_AFTER_ROUNDING)
 275             {
 276               w.d = a1 + u.d;
 277               if (w.ieee.exponent == 109)
 278                 return w.d * 0x1p-108;
 279             }
 280           /* v.ieee.mantissa1 & 2 is LSB bit of the result before rounding,
 281              v.ieee.mantissa1 & 1 is the round bit and j is our sticky
 282              bit.  */
 283           w.d = 0.0;
 284           w.ieee.mantissa1 = ((v.ieee.mantissa1 & 3) << 1) | j;
 285           w.ieee.negative = v.ieee.negative;
 286           v.ieee.mantissa1 &= ~3U;
 287           v.d *= 0x1p-108;
 288           w.d *= 0x1p-2;
 289           return v.d + w.d;
 290         }
 291       v.ieee.mantissa1 |= j;
 292       return v.d * 0x1p-108;
 293     }
 294 }
 295 #ifndef __fma
 296 libm_alias_double (__fma, fma)
 297 #endif