contrib/mpfr/fma.c

   1 /* mpfr_fma -- Floating multiply-add
   2
   3 Copyright 2001, 2002, 2004, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
   4 Contributed by the Arenaire and Cacao projects, INRIA.
   5
   6 This file is part of the GNU MPFR Library.
   7
   8 The GNU MPFR Library is free software; you can redistribute
   9 it and/or modify it under the terms of the GNU Lesser
  10 General Public License as published by the Free Software
  11 Foundation; either version 2.1 of the License, or (at your
  12 option) any later version.
  13
  14 The GNU MPFR Library is distributed in the hope that it will
  15 be useful, but WITHOUT ANY WARRANTY; without even the
  16 implied warranty of MERCHANTABILITY or FITNESS FOR A
  17 PARTICULAR PURPOSE.  See the GNU Lesser General Public
  18 License for more details.
  19
  20 You should have received a copy of the GNU Lesser
  21 General Public License along with the GNU MPFR Library; see
  22 the file COPYING.LIB.  If not, write to the Free Software
  23 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
  24 MA 02110-1301, USA. */
  25
  26 #include "mpfr-impl.h"
  27
  28 /* The fused-multiply-add (fma) of x, y and z is defined by:
  29    fma(x,y,z)= x*y + z
  30 */
  31
  32 int
  33 mpfr_fma (mpfr_ptr s, mpfr_srcptr x, mpfr_srcptr y, mpfr_srcptr z,
  34           mp_rnd_t rnd_mode)
  35 {
  36   int inexact;
  37   mpfr_t u;
  38   MPFR_SAVE_EXPO_DECL (expo);
  39
  40   /* particular cases */
  41   if (MPFR_UNLIKELY( MPFR_IS_SINGULAR(x) ||
  42                      MPFR_IS_SINGULAR(y) ||
  43                      MPFR_IS_SINGULAR(z) ))
  44     {
  45       if (MPFR_IS_NAN(x) || MPFR_IS_NAN(y) || MPFR_IS_NAN(z))
  46         {
  47           MPFR_SET_NAN(s);
  48           MPFR_RET_NAN;
  49         }
  50       /* now neither x, y or z is NaN */
  51       else if (MPFR_IS_INF(x) || MPFR_IS_INF(y))
  52         {
  53           /* cases Inf*0+z, 0*Inf+z, Inf-Inf */
  54           if ((MPFR_IS_ZERO(y)) ||
  55               (MPFR_IS_ZERO(x)) ||
  56               (MPFR_IS_INF(z) &&
  57                ((MPFR_MULT_SIGN(MPFR_SIGN(x), MPFR_SIGN(y))) != MPFR_SIGN(z))))
  58             {
  59               MPFR_SET_NAN(s);
  60               MPFR_RET_NAN;
  61             }
  62           else if (MPFR_IS_INF(z)) /* case Inf-Inf already checked above */
  63             {
  64               MPFR_SET_INF(s);
  65               MPFR_SET_SAME_SIGN(s, z);
  66               MPFR_RET(0);
  67             }
  68           else /* z is finite */
  69             {
  70               MPFR_SET_INF(s);
  71               MPFR_SET_SIGN(s, MPFR_MULT_SIGN(MPFR_SIGN(x) , MPFR_SIGN(y)));
  72               MPFR_RET(0);
  73             }
  74         }
  75       /* now x and y are finite */
  76       else if (MPFR_IS_INF(z))
  77         {
  78           MPFR_SET_INF(s);
  79           MPFR_SET_SAME_SIGN(s, z);
  80           MPFR_RET(0);
  81         }
  82       else if (MPFR_IS_ZERO(x) || MPFR_IS_ZERO(y))
  83         {
  84           if (MPFR_IS_ZERO(z))
  85             {
  86               int sign_p;
  87               sign_p = MPFR_MULT_SIGN( MPFR_SIGN(x) , MPFR_SIGN(y) );
  88               MPFR_SET_SIGN(s,(rnd_mode != GMP_RNDD ?
  89                                ((MPFR_IS_NEG_SIGN(sign_p) && MPFR_IS_NEG(z))
  90                                 ? -1 : 1) :
  91                                ((MPFR_IS_POS_SIGN(sign_p) && MPFR_IS_POS(z))
  92                                 ? 1 : -1)));
  93               MPFR_SET_ZERO(s);
  94               MPFR_RET(0);
  95             }
  96           else
  97             return mpfr_set (s, z, rnd_mode);
  98         }
  99       else /* necessarily z is zero here */
 100         {
 101           MPFR_ASSERTD(MPFR_IS_ZERO(z));
 102           return mpfr_mul (s, x, y, rnd_mode);
 103         }
 104     }
 105
 106   /* If we take prec(u) >= prec(x) + prec(y), the product u <- x*y
 107      is exact, except in case of overflow or underflow. */
 108   MPFR_SAVE_EXPO_MARK (expo);
 109   mpfr_init2 (u, MPFR_PREC(x) + MPFR_PREC(y));
 110
 111   if (MPFR_UNLIKELY (mpfr_mul (u, x, y, GMP_RNDN)))
 112     {
 113       /* overflow or underflow - this case is regarded as rare, thus
 114          does not need to be very efficient (even if some tests below
 115          could have been done earlier).
 116          It is an overflow iff u is an infinity (since GMP_RNDN was used).
 117          Alternatively, we could test the overflow flag, but in this case,
 118          mpfr_clear_flags would have been necessary. */
 119       if (MPFR_IS_INF (u))  /* overflow */
 120         {
 121           /* Let's eliminate the obvious case where x*y and z have the
 122              same sign. No possible cancellation -> real overflow.
 123              Also, we know that |z| < 2^emax. If E(x) + E(y) >= emax+3,
 124              then |x*y| >= 2^(emax+1), and |x*y + z| >= 2^emax. This case
 125              is also an overflow. */
 126           if (MPFR_SIGN (u) == MPFR_SIGN (z) ||
 127               MPFR_GET_EXP (x) + MPFR_GET_EXP (y) >= __gmpfr_emax + 3)
 128             {
 129               mpfr_clear (u);
 130               MPFR_SAVE_EXPO_FREE (expo);
 131               return mpfr_overflow (s, rnd_mode, MPFR_SIGN (z));
 132             }
 133
 134           /* E(x) + E(y) <= emax+2, therefore |x*y| < 2^(emax+2), and
 135              (x/4)*y does not overflow (let's recall that the result
 136              is exact with an unbounded exponent range). It does not
 137              underflow either, because x*y overflows and the exponent
 138              range is large enough. */
 139           inexact = mpfr_div_2ui (u, x, 2, GMP_RNDN);
 140           MPFR_ASSERTN (inexact == 0);
 141           inexact = mpfr_mul (u, u, y, GMP_RNDN);
 142           MPFR_ASSERTN (inexact == 0);
 143
 144           /* Now, we need to add z/4... But it may underflow! */
 145           {
 146             mpfr_t zo4;
 147             mpfr_srcptr zz;
 148             MPFR_BLOCK_DECL (flags);
 149
 150             if (MPFR_GET_EXP (u) > MPFR_GET_EXP (z) &&
 151                 MPFR_GET_EXP (u) - MPFR_GET_EXP (z) > MPFR_PREC (u))
 152               {
 153                 /* |z| < ulp(u)/2, therefore one can use z instead of z/4. */
 154                 zz = z;
 155               }
 156             else
 157               {
 158                 mpfr_init2 (zo4, MPFR_PREC (z));
 159                 if (mpfr_div_2ui (zo4, z, 2, GMP_RNDZ))
 160                   {
 161                     /* The division by 4 underflowed! */
 162                     MPFR_ASSERTN (0); /* TODO... */
 163                   }
 164                 zz = zo4;
 165               }
 166
 167             /* Let's recall that u = x*y/4 and zz = z/4 (or z if the
 168                following addition would give the same result). */
 169             MPFR_BLOCK (flags, inexact = mpfr_add (s, u, zz, rnd_mode));
 170             /* u and zz have different signs, so that an overflow
 171                is not possible. But an underflow is theoretically
 172                possible! */
 173             if (MPFR_UNDERFLOW (flags))
 174               {
 175                 MPFR_ASSERTN (zz != z);
 176                 MPFR_ASSERTN (0); /* TODO... */
 177                 mpfr_clears (zo4, u, (mpfr_ptr) 0);
 178               }
 179             else
 180               {
 181                 int inex2;
 182
 183                 if (zz != z)
 184                   mpfr_clear (zo4);
 185                 mpfr_clear (u);
 186                 MPFR_ASSERTN (! MPFR_OVERFLOW (flags));
 187                 inex2 = mpfr_mul_2ui (s, s, 2, rnd_mode);
 188                 if (inex2)  /* overflow */
 189                   {
 190                     inexact = inex2;
 191                     MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
 192                   }
 193                 goto end;
 194               }
 195           }
 196         }
 197       else  /* underflow: one has |xy| < 2^(emin-1). */
 198         {
 199           unsigned long scale = 0;
 200           mpfr_t scaled_z;
 201           mpfr_srcptr new_z;
 202           mp_exp_t diffexp;
 203           mp_prec_t pzs;
 204           int xy_underflows;
 205
 206           /* Let's scale z so that ulp(z) > 2^emin and ulp(s) > 2^emin
 207              (the + 1 on MPFR_PREC (s) is necessary because the exponent
 208              of the result can be EXP(z) - 1). */
 209           diffexp = MPFR_GET_EXP (z) - __gmpfr_emin;
 210           pzs = MAX (MPFR_PREC (z), MPFR_PREC (s) + 1);
 211           if (diffexp <= pzs)
 212             {
 213               mp_exp_unsigned_t uscale;
 214               mpfr_t scaled_v;
 215               MPFR_BLOCK_DECL (flags);
 216
 217               uscale = (mp_exp_unsigned_t) pzs - diffexp + 1;
 218               MPFR_ASSERTN (uscale > 0);
 219               MPFR_ASSERTN (uscale <= ULONG_MAX);
 220               scale = uscale;
 221               mpfr_init2 (scaled_z, MPFR_PREC (z));
 222               inexact = mpfr_mul_2ui (scaled_z, z, scale, GMP_RNDN);
 223               MPFR_ASSERTN (inexact == 0);  /* TODO: overflow case */
 224               new_z = scaled_z;
 225               /* Now we need to recompute u = xy * 2^scale. */
 226               MPFR_BLOCK (flags,
 227                           if (MPFR_GET_EXP (x) < MPFR_GET_EXP (y))
 228                             {
 229                               mpfr_init2 (scaled_v, MPFR_PREC (x));
 230                               mpfr_mul_2ui (scaled_v, x, scale, GMP_RNDN);
 231                               mpfr_mul (u, scaled_v, y, GMP_RNDN);
 232                             }
 233                           else
 234                             {
 235                               mpfr_init2 (scaled_v, MPFR_PREC (y));
 236                               mpfr_mul_2ui (scaled_v, y, scale, GMP_RNDN);
 237                               mpfr_mul (u, x, scaled_v, GMP_RNDN);
 238                             });
 239               mpfr_clear (scaled_v);
 240               MPFR_ASSERTN (! MPFR_OVERFLOW (flags));
 241               xy_underflows = MPFR_UNDERFLOW (flags);
 242             }
 243           else
 244             {
 245               new_z = z;
 246               xy_underflows = 1;
 247             }
 248
 249           if (xy_underflows)
 250             {
 251               /* Let's replace xy by sign(xy) * 2^(emin-1). */
 252               mpfr_set_prec (u, MPFR_PREC_MIN);
 253               mpfr_setmin (u, __gmpfr_emin);
 254               MPFR_SET_SIGN (u, MPFR_MULT_SIGN (MPFR_SIGN (x),
 255                                                 MPFR_SIGN (y)));
 256             }
 257
 258           {
 259             MPFR_BLOCK_DECL (flags);
 260
 261             MPFR_BLOCK (flags, inexact = mpfr_add (s, u, new_z, rnd_mode));
 262             mpfr_clear (u);
 263
 264             if (scale != 0)
 265               {
 266                 int inex2;
 267
 268                 mpfr_clear (scaled_z);
 269                 /* Here an overflow is theoretically possible, in which case
 270                    the result may be wrong, hence the assert. An underflow
 271                    is not possible, but let's check that anyway. */
 272                 MPFR_ASSERTN (! MPFR_OVERFLOW (flags));  /* TODO... */
 273                 MPFR_ASSERTN (! MPFR_UNDERFLOW (flags));  /* not possible */
 274                 inex2 = mpfr_div_2ui (s, s, scale, GMP_RNDN);
 275                 /* FIXME: this seems incorrect. GMP_RNDN -> rnd_mode?
 276                    Also, handle the double rounding case:
 277                    s / 2^scale = 2^(emin - 2) in GMP_RNDN. */
 278                 if (inex2)  /* underflow */
 279                   inexact = inex2;
 280               }
 281           }
 282
 283           /* FIXME/TODO: I'm not sure that the following is correct.
 284              Check for possible spurious exceptions due to intermediate
 285              computations. */
 286           MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
 287           goto end;
 288         }
 289     }
 290
 291   inexact = mpfr_add (s, u, z, rnd_mode);
 292   mpfr_clear (u);
 293   MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
 294  end:
 295   MPFR_SAVE_EXPO_FREE (expo);
 296   return mpfr_check_range (s, inexact, rnd_mode);
 297 }