sysdeps/ieee754/ldbl-96/s_fmal.c

   1 /* Compute x * y + z as ternary operation.
   2    Copyright (C) 2010-2024 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Lesser General Public
   7    License as published by the Free Software Foundation; either
   8    version 2.1 of the License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Lesser General Public License for more details.
  14
  15    You should have received a copy of the GNU Lesser General Public
  16    License along with the GNU C Library; if not, see
  17    <https://www.gnu.org/licenses/>.  */
  18
  19 #define NO_MATH_REDIRECT
  20 #include <float.h>
  21 #include <math.h>
  22 #include <fenv.h>
  23 #include <ieee754.h>
  24 #include <math-barriers.h>
  25 #include <libm-alias-ldouble.h>
  26 #include <tininess.h>
  27
  28 /* This implementation uses rounding to odd to avoid problems with
  29    double rounding.  See a paper by Boldo and Melquiond:
  30    http://www.lri.fr/~melquion/doc/08-tc.pdf  */
  31
  32 long double
  33 __fmal (long double x, long double y, long double z)
  34 {
  35   union ieee854_long_double u, v, w;
  36   int adjust = 0;
  37   u.d = x;
  38   v.d = y;
  39   w.d = z;
  40   if (__builtin_expect (u.ieee.exponent + v.ieee.exponent
  41                         >= 0x7fff + IEEE854_LONG_DOUBLE_BIAS
  42                            - LDBL_MANT_DIG, 0)
  43       || __builtin_expect (u.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
  44       || __builtin_expect (v.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
  45       || __builtin_expect (w.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
  46       || __builtin_expect (u.ieee.exponent + v.ieee.exponent
  47                            <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG, 0))
  48     {
  49       /* If z is Inf, but x and y are finite, the result should be
  50          z rather than NaN.  */
  51       if (w.ieee.exponent == 0x7fff
  52           && u.ieee.exponent != 0x7fff
  53           && v.ieee.exponent != 0x7fff)
  54         return (z + x) + y;
  55       /* If z is zero and x are y are nonzero, compute the result
  56          as x * y to avoid the wrong sign of a zero result if x * y
  57          underflows to 0.  */
  58       if (z == 0 && x != 0 && y != 0)
  59         return x * y;
  60       /* If x or y or z is Inf/NaN, or if x * y is zero, compute as
  61          x * y + z.  */
  62       if (u.ieee.exponent == 0x7fff
  63           || v.ieee.exponent == 0x7fff
  64           || w.ieee.exponent == 0x7fff
  65           || x == 0
  66           || y == 0)
  67         return x * y + z;
  68       /* If fma will certainly overflow, compute as x * y.  */
  69       if (u.ieee.exponent + v.ieee.exponent
  70           > 0x7fff + IEEE854_LONG_DOUBLE_BIAS)
  71         return x * y;
  72       /* If x * y is less than 1/4 of LDBL_TRUE_MIN, neither the
  73          result nor whether there is underflow depends on its exact
  74          value, only on its sign.  */
  75       if (u.ieee.exponent + v.ieee.exponent
  76           < IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG - 2)
  77         {
  78           int neg = u.ieee.negative ^ v.ieee.negative;
  79           long double tiny = neg ? -0x1p-16445L : 0x1p-16445L;
  80           if (w.ieee.exponent >= 3)
  81             return tiny + z;
  82           /* Scaling up, adding TINY and scaling down produces the
  83              correct result, because in round-to-nearest mode adding
  84              TINY has no effect and in other modes double rounding is
  85              harmless.  But it may not produce required underflow
  86              exceptions.  */
  87           v.d = z * 0x1p65L + tiny;
  88           if (TININESS_AFTER_ROUNDING
  89               ? v.ieee.exponent < 66
  90               : (w.ieee.exponent == 0
  91                  || (w.ieee.exponent == 1
  92                      && w.ieee.negative != neg
  93                      && w.ieee.mantissa1 == 0
  94                      && w.ieee.mantissa0 == 0x80000000)))
  95             {
  96               long double force_underflow = x * y;
  97               math_force_eval (force_underflow);
  98             }
  99           return v.d * 0x1p-65L;
 100         }
 101       if (u.ieee.exponent + v.ieee.exponent
 102           >= 0x7fff + IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG)
 103         {
 104           /* Compute 1p-64 times smaller result and multiply
 105              at the end.  */
 106           if (u.ieee.exponent > v.ieee.exponent)
 107             u.ieee.exponent -= LDBL_MANT_DIG;
 108           else
 109             v.ieee.exponent -= LDBL_MANT_DIG;
 110           /* If x + y exponent is very large and z exponent is very small,
 111              it doesn't matter if we don't adjust it.  */
 112           if (w.ieee.exponent > LDBL_MANT_DIG)
 113             w.ieee.exponent -= LDBL_MANT_DIG;
 114           adjust = 1;
 115         }
 116       else if (w.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
 117         {
 118           /* Similarly.
 119              If z exponent is very large and x and y exponents are
 120              very small, adjust them up to avoid spurious underflows,
 121              rather than down.  */
 122           if (u.ieee.exponent + v.ieee.exponent
 123               <= IEEE854_LONG_DOUBLE_BIAS + 2 * LDBL_MANT_DIG)
 124             {
 125               if (u.ieee.exponent > v.ieee.exponent)
 126                 u.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
 127               else
 128                 v.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
 129             }
 130           else if (u.ieee.exponent > v.ieee.exponent)
 131             {
 132               if (u.ieee.exponent > LDBL_MANT_DIG)
 133                 u.ieee.exponent -= LDBL_MANT_DIG;
 134             }
 135           else if (v.ieee.exponent > LDBL_MANT_DIG)
 136             v.ieee.exponent -= LDBL_MANT_DIG;
 137           w.ieee.exponent -= LDBL_MANT_DIG;
 138           adjust = 1;
 139         }
 140       else if (u.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
 141         {
 142           u.ieee.exponent -= LDBL_MANT_DIG;
 143           if (v.ieee.exponent)
 144             v.ieee.exponent += LDBL_MANT_DIG;
 145           else
 146             v.d *= 0x1p64L;
 147         }
 148       else if (v.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
 149         {
 150           v.ieee.exponent -= LDBL_MANT_DIG;
 151           if (u.ieee.exponent)
 152             u.ieee.exponent += LDBL_MANT_DIG;
 153           else
 154             u.d *= 0x1p64L;
 155         }
 156       else /* if (u.ieee.exponent + v.ieee.exponent
 157                   <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG) */
 158         {
 159           if (u.ieee.exponent > v.ieee.exponent)
 160             u.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
 161           else
 162             v.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
 163           if (w.ieee.exponent <= 4 * LDBL_MANT_DIG + 6)
 164             {
 165               if (w.ieee.exponent)
 166                 w.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
 167               else
 168                 w.d *= 0x1p130L;
 169               adjust = -1;
 170             }
 171           /* Otherwise x * y should just affect inexact
 172              and nothing else.  */
 173         }
 174       x = u.d;
 175       y = v.d;
 176       z = w.d;
 177     }
 178
 179   /* Ensure correct sign of exact 0 + 0.  */
 180   if (__glibc_unlikely ((x == 0 || y == 0) && z == 0))
 181     {
 182       x = math_opt_barrier (x);
 183       return x * y + z;
 184     }
 185
 186   fenv_t env;
 187   feholdexcept (&env);
 188   fesetround (FE_TONEAREST);
 189
 190   /* Multiplication m1 + m2 = x * y using Dekker's algorithm.  */
 191 #define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1)
 192   long double x1 = x * C;
 193   long double y1 = y * C;
 194   long double m1 = x * y;
 195   x1 = (x - x1) + x1;
 196   y1 = (y - y1) + y1;
 197   long double x2 = x - x1;
 198   long double y2 = y - y1;
 199   long double m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;
 200
 201   /* Addition a1 + a2 = z + m1 using Knuth's algorithm.  */
 202   long double a1 = z + m1;
 203   long double t1 = a1 - z;
 204   long double t2 = a1 - t1;
 205   t1 = m1 - t1;
 206   t2 = z - t2;
 207   long double a2 = t1 + t2;
 208   /* Ensure the arithmetic is not scheduled after feclearexcept call.  */
 209   math_force_eval (m2);
 210   math_force_eval (a2);
 211   feclearexcept (FE_INEXACT);
 212
 213   /* If the result is an exact zero, ensure it has the correct sign.  */
 214   if (a1 == 0 && m2 == 0)
 215     {
 216       feupdateenv (&env);
 217       /* Ensure that round-to-nearest value of z + m1 is not reused.  */
 218       z = math_opt_barrier (z);
 219       return z + m1;
 220     }
 221
 222   fesetround (FE_TOWARDZERO);
 223   /* Perform m2 + a2 addition with round to odd.  */
 224   u.d = a2 + m2;
 225
 226   if (__glibc_likely (adjust == 0))
 227     {
 228       if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7fff)
 229         u.ieee.mantissa1 |= fetestexcept (FE_INEXACT) != 0;
 230       feupdateenv (&env);
 231       /* Result is a1 + u.d.  */
 232       return a1 + u.d;
 233     }
 234   else if (__glibc_likely (adjust > 0))
 235     {
 236       if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7fff)
 237         u.ieee.mantissa1 |= fetestexcept (FE_INEXACT) != 0;
 238       feupdateenv (&env);
 239       /* Result is a1 + u.d, scaled up.  */
 240       return (a1 + u.d) * 0x1p64L;
 241     }
 242   else
 243     {
 244       if ((u.ieee.mantissa1 & 1) == 0)
 245         u.ieee.mantissa1 |= fetestexcept (FE_INEXACT) != 0;
 246       v.d = a1 + u.d;
 247       /* Ensure the addition is not scheduled after fetestexcept call.  */
 248       math_force_eval (v.d);
 249       int j = fetestexcept (FE_INEXACT) != 0;
 250       feupdateenv (&env);
 251       /* Ensure the following computations are performed in default rounding
 252          mode instead of just reusing the round to zero computation.  */
 253       asm volatile ("" : "=m" (u) : "m" (u));
 254       /* If a1 + u.d is exact, the only rounding happens during
 255          scaling down.  */
 256       if (j == 0)
 257         return v.d * 0x1p-130L;
 258       /* If result rounded to zero is not subnormal, no double
 259          rounding will occur.  */
 260       if (v.ieee.exponent > 130)
 261         return (a1 + u.d) * 0x1p-130L;
 262       /* If v.d * 0x1p-130L with round to zero is a subnormal above
 263          or equal to LDBL_MIN / 2, then v.d * 0x1p-130L shifts mantissa
 264          down just by 1 bit, which means v.ieee.mantissa1 |= j would
 265          change the round bit, not sticky or guard bit.
 266          v.d * 0x1p-130L never normalizes by shifting up,
 267          so round bit plus sticky bit should be already enough
 268          for proper rounding.  */
 269       if (v.ieee.exponent == 130)
 270         {
 271           /* If the exponent would be in the normal range when
 272              rounding to normal precision with unbounded exponent
 273              range, the exact result is known and spurious underflows
 274              must be avoided on systems detecting tininess after
 275              rounding.  */
 276           if (TININESS_AFTER_ROUNDING)
 277             {
 278               w.d = a1 + u.d;
 279               if (w.ieee.exponent == 131)
 280                 return w.d * 0x1p-130L;
 281             }
 282           /* v.ieee.mantissa1 & 2 is LSB bit of the result before rounding,
 283              v.ieee.mantissa1 & 1 is the round bit and j is our sticky
 284              bit.  */
 285           w.d = 0.0L;
 286           w.ieee.mantissa1 = ((v.ieee.mantissa1 & 3) << 1) | j;
 287           w.ieee.negative = v.ieee.negative;
 288           v.ieee.mantissa1 &= ~3U;
 289           v.d *= 0x1p-130L;
 290           w.d *= 0x1p-2L;
 291           return v.d + w.d;
 292         }
 293       v.ieee.mantissa1 |= j;
 294       return v.d * 0x1p-130L;
 295     }
 296 }
 297 libm_alias_ldouble (__fma, fma)