sysdeps/ieee754/ldbl-128/s_fmal.c

   1 /* Compute x * y + z as ternary operation.
   2    Copyright (C) 2010 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4    Contributed by Jakub Jelinek <jakub@redhat.com>, 2010.
   5
   6    The GNU C Library is free software; you can redistribute it and/or
   7    modify it under the terms of the GNU Lesser General Public
   8    License as published by the Free Software Foundation; either
   9    version 2.1 of the License, or (at your option) any later version.
  10
  11    The GNU C Library is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14    Lesser General Public License for more details.
  15
  16    You should have received a copy of the GNU Lesser General Public
  17    License along with the GNU C Library; if not, see
  18    <http://www.gnu.org/licenses/>.  */
  19
  20 #include <float.h>
  21 #include <math.h>
  22 #include <fenv.h>
  23 #include <ieee754.h>
  24
  25 /* This implementation uses rounding to odd to avoid problems with
  26    double rounding.  See a paper by Boldo and Melquiond:
  27    http://www.lri.fr/~melquion/doc/08-tc.pdf  */
  28
  29 long double
  30 __fmal (long double x, long double y, long double z)
  31 {
  32   union ieee854_long_double u, v, w;
  33   int adjust = 0;
  34   u.d = x;
  35   v.d = y;
  36   w.d = z;
  37   if (__builtin_expect (u.ieee.exponent + v.ieee.exponent
  38                         >= 0x7fff + IEEE854_LONG_DOUBLE_BIAS
  39                            - LDBL_MANT_DIG, 0)
  40       || __builtin_expect (u.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
  41       || __builtin_expect (v.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
  42       || __builtin_expect (w.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
  43       || __builtin_expect (u.ieee.exponent + v.ieee.exponent
  44                            <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG, 0))
  45     {
  46       /* If z is Inf, but x and y are finite, the result should be
  47          z rather than NaN.  */
  48       if (w.ieee.exponent == 0x7fff
  49           && u.ieee.exponent != 0x7fff
  50           && v.ieee.exponent != 0x7fff)
  51         return (z + x) + y;
  52       /* If x or y or z is Inf/NaN, or if fma will certainly overflow,
  53          or if x * y is less than half of LDBL_DENORM_MIN,
  54          compute as x * y + z.  */
  55       if (u.ieee.exponent == 0x7fff
  56           || v.ieee.exponent == 0x7fff
  57           || w.ieee.exponent == 0x7fff
  58           || u.ieee.exponent + v.ieee.exponent
  59              > 0x7fff + IEEE854_LONG_DOUBLE_BIAS
  60           || u.ieee.exponent + v.ieee.exponent
  61              < IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG - 2)
  62         return x * y + z;
  63       if (u.ieee.exponent + v.ieee.exponent
  64           >= 0x7fff + IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG)
  65         {
  66           /* Compute 1p-113 times smaller result and multiply
  67              at the end.  */
  68           if (u.ieee.exponent > v.ieee.exponent)
  69             u.ieee.exponent -= LDBL_MANT_DIG;
  70           else
  71             v.ieee.exponent -= LDBL_MANT_DIG;
  72           /* If x + y exponent is very large and z exponent is very small,
  73              it doesn't matter if we don't adjust it.  */
  74           if (w.ieee.exponent > LDBL_MANT_DIG)
  75             w.ieee.exponent -= LDBL_MANT_DIG;
  76           adjust = 1;
  77         }
  78       else if (w.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
  79         {
  80           /* Similarly.
  81              If z exponent is very large and x and y exponents are
  82              very small, it doesn't matter if we don't adjust it.  */
  83           if (u.ieee.exponent > v.ieee.exponent)
  84             {
  85               if (u.ieee.exponent > LDBL_MANT_DIG)
  86                 u.ieee.exponent -= LDBL_MANT_DIG;
  87             }
  88           else if (v.ieee.exponent > LDBL_MANT_DIG)
  89             v.ieee.exponent -= LDBL_MANT_DIG;
  90           w.ieee.exponent -= LDBL_MANT_DIG;
  91           adjust = 1;
  92         }
  93       else if (u.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
  94         {
  95           u.ieee.exponent -= LDBL_MANT_DIG;
  96           if (v.ieee.exponent)
  97             v.ieee.exponent += LDBL_MANT_DIG;
  98           else
  99             v.d *= 0x1p113L;
 100         }
 101       else if (v.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
 102         {
 103           v.ieee.exponent -= LDBL_MANT_DIG;
 104           if (u.ieee.exponent)
 105             u.ieee.exponent += LDBL_MANT_DIG;
 106           else
 107             u.d *= 0x1p113L;
 108         }
 109       else /* if (u.ieee.exponent + v.ieee.exponent
 110                   <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG) */
 111         {
 112           if (u.ieee.exponent > v.ieee.exponent)
 113             u.ieee.exponent += 2 * LDBL_MANT_DIG;
 114           else
 115             v.ieee.exponent += 2 * LDBL_MANT_DIG;
 116           if (w.ieee.exponent <= 4 * LDBL_MANT_DIG + 4)
 117             {
 118               if (w.ieee.exponent)
 119                 w.ieee.exponent += 2 * LDBL_MANT_DIG;
 120               else
 121                 w.d *= 0x1p226L;
 122               adjust = -1;
 123             }
 124           /* Otherwise x * y should just affect inexact
 125              and nothing else.  */
 126         }
 127       x = u.d;
 128       y = v.d;
 129       z = w.d;
 130     }
 131   /* Multiplication m1 + m2 = x * y using Dekker's algorithm.  */
 132 #define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1)
 133   long double x1 = x * C;
 134   long double y1 = y * C;
 135   long double m1 = x * y;
 136   x1 = (x - x1) + x1;
 137   y1 = (y - y1) + y1;
 138   long double x2 = x - x1;
 139   long double y2 = y - y1;
 140   long double m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;
 141
 142   /* Addition a1 + a2 = z + m1 using Knuth's algorithm.  */
 143   long double a1 = z + m1;
 144   long double t1 = a1 - z;
 145   long double t2 = a1 - t1;
 146   t1 = m1 - t1;
 147   t2 = z - t2;
 148   long double a2 = t1 + t2;
 149
 150   fenv_t env;
 151   feholdexcept (&env);
 152   fesetround (FE_TOWARDZERO);
 153   /* Perform m2 + a2 addition with round to odd.  */
 154   u.d = a2 + m2;
 155
 156   if (__builtin_expect (adjust == 0, 1))
 157     {
 158       if ((u.ieee.mantissa3 & 1) == 0 && u.ieee.exponent != 0x7fff)
 159         u.ieee.mantissa3 |= fetestexcept (FE_INEXACT) != 0;
 160       feupdateenv (&env);
 161       /* Result is a1 + u.d.  */
 162       return a1 + u.d;
 163     }
 164   else if (__builtin_expect (adjust > 0, 1))
 165     {
 166       if ((u.ieee.mantissa3 & 1) == 0 && u.ieee.exponent != 0x7fff)
 167         u.ieee.mantissa3 |= fetestexcept (FE_INEXACT) != 0;
 168       feupdateenv (&env);
 169       /* Result is a1 + u.d, scaled up.  */
 170       return (a1 + u.d) * 0x1p113L;
 171     }
 172   else
 173     {
 174       if ((u.ieee.mantissa3 & 1) == 0)
 175         u.ieee.mantissa3 |= fetestexcept (FE_INEXACT) != 0;
 176       v.d = a1 + u.d;
 177       /* Ensure the addition is not scheduled after fetestexcept call.  */
 178       asm volatile ("" : : "m" (v));
 179       int j = fetestexcept (FE_INEXACT) != 0;
 180       feupdateenv (&env);
 181       /* Ensure the following computations are performed in default rounding
 182          mode instead of just reusing the round to zero computation.  */
 183       asm volatile ("" : "=m" (u) : "m" (u));
 184       /* If a1 + u.d is exact, the only rounding happens during
 185          scaling down.  */
 186       if (j == 0)
 187         return v.d * 0x1p-226L;
 188       /* If result rounded to zero is not subnormal, no double
 189          rounding will occur.  */
 190       if (v.ieee.exponent > 226)
 191         return (a1 + u.d) * 0x1p-226L;
 192       /* If v.d * 0x1p-226L with round to zero is a subnormal above
 193          or equal to LDBL_MIN / 2, then v.d * 0x1p-226L shifts mantissa
 194          down just by 1 bit, which means v.ieee.mantissa3 |= j would
 195          change the round bit, not sticky or guard bit.
 196          v.d * 0x1p-226L never normalizes by shifting up,
 197          so round bit plus sticky bit should be already enough
 198          for proper rounding.  */
 199       if (v.ieee.exponent == 226)
 200         {
 201           /* v.ieee.mantissa3 & 2 is LSB bit of the result before rounding,
 202              v.ieee.mantissa3 & 1 is the round bit and j is our sticky
 203              bit.  In round-to-nearest 001 rounds down like 00,
 204              011 rounds up, even though 01 rounds down (thus we need
 205              to adjust), 101 rounds down like 10 and 111 rounds up
 206              like 11.  */
 207           if ((v.ieee.mantissa3 & 3) == 1)
 208             {
 209               v.d *= 0x1p-226L;
 210               if (v.ieee.negative)
 211                 return v.d - 0x1p-16494L /* __LDBL_DENORM_MIN__ */;
 212               else
 213                 return v.d + 0x1p-16494L /* __LDBL_DENORM_MIN__ */;
 214             }
 215           else
 216             return v.d * 0x1p-226L;
 217         }
 218       v.ieee.mantissa3 |= j;
 219       return v.d * 0x1p-226L;
 220     }
 221 }
 222 weak_alias (__fmal, fmal)