source/libs/mpfr/mpfr-3.1.3/src/fma.c

   1 /* mpfr_fma -- Floating multiply-add
   2
   3 Copyright 2001-2002, 2004, 2006-2015 Free Software Foundation, Inc.
   4 Contributed by the AriC and Caramel projects, INRIA.
   5
   6 This file is part of the GNU MPFR Library.
   7
   8 The GNU MPFR Library is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU Lesser General Public License as published by
  10 the Free Software Foundation; either version 3 of the License, or (at your
  11 option) any later version.
  12
  13 The GNU MPFR Library is distributed in the hope that it will be useful, but
  14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  16 License for more details.
  17
  18 You should have received a copy of the GNU Lesser General Public License
  19 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
  20 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
  21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
  22
  23 #include "mpfr-impl.h"
  24
  25 /* The fused-multiply-add (fma) of x, y and z is defined by:
  26    fma(x,y,z)= x*y + z
  27 */
  28
  29 int
  30 mpfr_fma (mpfr_ptr s, mpfr_srcptr x, mpfr_srcptr y, mpfr_srcptr z,
  31           mpfr_rnd_t rnd_mode)
  32 {
  33   int inexact;
  34   mpfr_t u;
  35   MPFR_SAVE_EXPO_DECL (expo);
  36   MPFR_GROUP_DECL(group);
  37
  38   MPFR_LOG_FUNC
  39     (("x[%Pu]=%.*Rg y[%Pu]=%.*Rg  z[%Pu]=%.*Rg rnd=%d",
  40       mpfr_get_prec (x), mpfr_log_prec, x,
  41       mpfr_get_prec (y), mpfr_log_prec, y,
  42       mpfr_get_prec (z), mpfr_log_prec, z, rnd_mode),
  43      ("s[%Pu]=%.*Rg inexact=%d",
  44       mpfr_get_prec (s), mpfr_log_prec, s, inexact));
  45
  46   /* particular cases */
  47   if (MPFR_UNLIKELY( MPFR_IS_SINGULAR(x) ||
  48                      MPFR_IS_SINGULAR(y) ||
  49                      MPFR_IS_SINGULAR(z) ))
  50     {
  51       if (MPFR_IS_NAN(x) || MPFR_IS_NAN(y) || MPFR_IS_NAN(z))
  52         {
  53           MPFR_SET_NAN(s);
  54           MPFR_RET_NAN;
  55         }
  56       /* now neither x, y or z is NaN */
  57       else if (MPFR_IS_INF(x) || MPFR_IS_INF(y))
  58         {
  59           /* cases Inf*0+z, 0*Inf+z, Inf-Inf */
  60           if ((MPFR_IS_ZERO(y)) ||
  61               (MPFR_IS_ZERO(x)) ||
  62               (MPFR_IS_INF(z) &&
  63                ((MPFR_MULT_SIGN(MPFR_SIGN(x), MPFR_SIGN(y))) != MPFR_SIGN(z))))
  64             {
  65               MPFR_SET_NAN(s);
  66               MPFR_RET_NAN;
  67             }
  68           else if (MPFR_IS_INF(z)) /* case Inf-Inf already checked above */
  69             {
  70               MPFR_SET_INF(s);
  71               MPFR_SET_SAME_SIGN(s, z);
  72               MPFR_RET(0);
  73             }
  74           else /* z is finite */
  75             {
  76               MPFR_SET_INF(s);
  77               MPFR_SET_SIGN(s, MPFR_MULT_SIGN(MPFR_SIGN(x) , MPFR_SIGN(y)));
  78               MPFR_RET(0);
  79             }
  80         }
  81       /* now x and y are finite */
  82       else if (MPFR_IS_INF(z))
  83         {
  84           MPFR_SET_INF(s);
  85           MPFR_SET_SAME_SIGN(s, z);
  86           MPFR_RET(0);
  87         }
  88       else if (MPFR_IS_ZERO(x) || MPFR_IS_ZERO(y))
  89         {
  90           if (MPFR_IS_ZERO(z))
  91             {
  92               int sign_p;
  93               sign_p = MPFR_MULT_SIGN( MPFR_SIGN(x) , MPFR_SIGN(y) );
  94               MPFR_SET_SIGN(s,(rnd_mode != MPFR_RNDD ?
  95                                ((MPFR_IS_NEG_SIGN(sign_p) && MPFR_IS_NEG(z))
  96                                 ? -1 : 1) :
  97                                ((MPFR_IS_POS_SIGN(sign_p) && MPFR_IS_POS(z))
  98                                 ? 1 : -1)));
  99               MPFR_SET_ZERO(s);
 100               MPFR_RET(0);
 101             }
 102           else
 103             return mpfr_set (s, z, rnd_mode);
 104         }
 105       else /* necessarily z is zero here */
 106         {
 107           MPFR_ASSERTD(MPFR_IS_ZERO(z));
 108           return mpfr_mul (s, x, y, rnd_mode);
 109         }
 110     }
 111
 112   /* If we take prec(u) >= prec(x) + prec(y), the product u <- x*y
 113      is exact, except in case of overflow or underflow. */
 114   MPFR_SAVE_EXPO_MARK (expo);
 115   MPFR_GROUP_INIT_1 (group, MPFR_PREC(x) + MPFR_PREC(y), u);
 116
 117   if (MPFR_UNLIKELY (mpfr_mul (u, x, y, MPFR_RNDN)))
 118     {
 119       /* overflow or underflow - this case is regarded as rare, thus
 120          does not need to be very efficient (even if some tests below
 121          could have been done earlier).
 122          It is an overflow iff u is an infinity (since MPFR_RNDN was used).
 123          Alternatively, we could test the overflow flag, but in this case,
 124          mpfr_clear_flags would have been necessary. */
 125
 126       if (MPFR_IS_INF (u))  /* overflow */
 127         {
 128           MPFR_LOG_MSG (("Overflow on x*y\n", 0));
 129
 130           /* Let's eliminate the obvious case where x*y and z have the
 131              same sign. No possible cancellation -> real overflow.
 132              Also, we know that |z| < 2^emax. If E(x) + E(y) >= emax+3,
 133              then |x*y| >= 2^(emax+1), and |x*y + z| >= 2^emax. This case
 134              is also an overflow. */
 135           if (MPFR_SIGN (u) == MPFR_SIGN (z) ||
 136               MPFR_GET_EXP (x) + MPFR_GET_EXP (y) >= __gmpfr_emax + 3)
 137             {
 138               MPFR_GROUP_CLEAR (group);
 139               MPFR_SAVE_EXPO_FREE (expo);
 140               return mpfr_overflow (s, rnd_mode, MPFR_SIGN (z));
 141             }
 142
 143           /* E(x) + E(y) <= emax+2, therefore |x*y| < 2^(emax+2), and
 144              (x/4)*y does not overflow (let's recall that the result
 145              is exact with an unbounded exponent range). It does not
 146              underflow either, because x*y overflows and the exponent
 147              range is large enough. */
 148           inexact = mpfr_div_2ui (u, x, 2, MPFR_RNDN);
 149           MPFR_ASSERTN (inexact == 0);
 150           inexact = mpfr_mul (u, u, y, MPFR_RNDN);
 151           MPFR_ASSERTN (inexact == 0);
 152
 153           /* Now, we need to add z/4... But it may underflow! */
 154           {
 155             mpfr_t zo4;
 156             mpfr_srcptr zz;
 157             MPFR_BLOCK_DECL (flags);
 158
 159             if (MPFR_GET_EXP (u) > MPFR_GET_EXP (z) &&
 160                 MPFR_GET_EXP (u) - MPFR_GET_EXP (z) > MPFR_PREC (u))
 161               {
 162                 /* |z| < ulp(u)/2, therefore one can use z instead of z/4. */
 163                 zz = z;
 164               }
 165             else
 166               {
 167                 mpfr_init2 (zo4, MPFR_PREC (z));
 168                 if (mpfr_div_2ui (zo4, z, 2, MPFR_RNDZ))
 169                   {
 170                     /* The division by 4 underflowed! */
 171                     MPFR_ASSERTN (0); /* TODO... */
 172                   }
 173                 zz = zo4;
 174               }
 175
 176             /* Let's recall that u = x*y/4 and zz = z/4 (or z if the
 177                following addition would give the same result). */
 178             MPFR_BLOCK (flags, inexact = mpfr_add (s, u, zz, rnd_mode));
 179             /* u and zz have different signs, so that an overflow
 180                is not possible. But an underflow is theoretically
 181                possible! */
 182             if (MPFR_UNDERFLOW (flags))
 183               {
 184                 MPFR_ASSERTN (zz != z);
 185                 MPFR_ASSERTN (0); /* TODO... */
 186                 mpfr_clears (zo4, u, (mpfr_ptr) 0);
 187               }
 188             else
 189               {
 190                 int inex2;
 191
 192                 if (zz != z)
 193                   mpfr_clear (zo4);
 194                 MPFR_GROUP_CLEAR (group);
 195                 MPFR_ASSERTN (! MPFR_OVERFLOW (flags));
 196                 inex2 = mpfr_mul_2ui (s, s, 2, rnd_mode);
 197                 if (inex2)  /* overflow */
 198                   {
 199                     inexact = inex2;
 200                     MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
 201                   }
 202                 goto end;
 203               }
 204           }
 205         }
 206       else  /* underflow: one has |xy| < 2^(emin-1). */
 207         {
 208           unsigned long scale = 0;
 209           mpfr_t scaled_z;
 210           mpfr_srcptr new_z;
 211           mpfr_exp_t diffexp;
 212           mpfr_prec_t pzs;
 213           int xy_underflows;
 214
 215           MPFR_LOG_MSG (("Underflow on x*y\n", 0));
 216
 217           /* Let's scale z so that ulp(z) > 2^emin and ulp(s) > 2^emin
 218              (the + 1 on MPFR_PREC (s) is necessary because the exponent
 219              of the result can be EXP(z) - 1). */
 220           diffexp = MPFR_GET_EXP (z) - __gmpfr_emin;
 221           pzs = MAX (MPFR_PREC (z), MPFR_PREC (s) + 1);
 222           MPFR_LOG_MSG (("diffexp=%" MPFR_EXP_FSPEC "d pzs=%Pd\n",
 223                          diffexp, pzs));
 224           if (diffexp <= pzs)
 225             {
 226               mpfr_uexp_t uscale;
 227               mpfr_t scaled_v;
 228               MPFR_BLOCK_DECL (flags);
 229
 230               uscale = (mpfr_uexp_t) pzs - diffexp + 1;
 231               MPFR_ASSERTN (uscale > 0);
 232               MPFR_ASSERTN (uscale <= ULONG_MAX);
 233               scale = uscale;
 234               mpfr_init2 (scaled_z, MPFR_PREC (z));
 235               inexact = mpfr_mul_2ui (scaled_z, z, scale, MPFR_RNDN);
 236               MPFR_ASSERTN (inexact == 0);  /* TODO: overflow case */
 237               new_z = scaled_z;
 238               /* Now we need to recompute u = xy * 2^scale. */
 239               MPFR_BLOCK (flags,
 240                           if (MPFR_GET_EXP (x) < MPFR_GET_EXP (y))
 241                             {
 242                               mpfr_init2 (scaled_v, MPFR_PREC (x));
 243                               mpfr_mul_2ui (scaled_v, x, scale, MPFR_RNDN);
 244                               mpfr_mul (u, scaled_v, y, MPFR_RNDN);
 245                             }
 246                           else
 247                             {
 248                               mpfr_init2 (scaled_v, MPFR_PREC (y));
 249                               mpfr_mul_2ui (scaled_v, y, scale, MPFR_RNDN);
 250                               mpfr_mul (u, x, scaled_v, MPFR_RNDN);
 251                             });
 252               mpfr_clear (scaled_v);
 253               MPFR_ASSERTN (! MPFR_OVERFLOW (flags));
 254               xy_underflows = MPFR_UNDERFLOW (flags);
 255             }
 256           else
 257             {
 258               new_z = z;
 259               xy_underflows = 1;
 260             }
 261
 262           MPFR_LOG_MSG (("scale=%lu xy_underflows=%d\n",
 263                          scale, xy_underflows));
 264
 265           if (xy_underflows)
 266             {
 267               /* Let's replace xy by sign(xy) * 2^(emin-1). */
 268               MPFR_PREC (u) = MPFR_PREC_MIN;
 269               mpfr_setmin (u, __gmpfr_emin);
 270               MPFR_SET_SIGN (u, MPFR_MULT_SIGN (MPFR_SIGN (x),
 271                                                 MPFR_SIGN (y)));
 272             }
 273
 274           {
 275             MPFR_BLOCK_DECL (flags);
 276
 277             MPFR_BLOCK (flags, inexact = mpfr_add (s, u, new_z, rnd_mode));
 278             MPFR_LOG_MSG (("inexact=%d\n", inexact));
 279             MPFR_GROUP_CLEAR (group);
 280             if (scale != 0)
 281               {
 282                 int inex2;
 283
 284                 mpfr_clear (scaled_z);
 285                 /* Here an overflow is theoretically possible, in which case
 286                    the result may be wrong, hence the assert. An underflow
 287                    is not possible, but let's check that anyway. */
 288                 MPFR_ASSERTN (! MPFR_OVERFLOW (flags));  /* TODO... */
 289                 MPFR_ASSERTN (! MPFR_UNDERFLOW (flags));  /* not possible */
 290                 if (rnd_mode == MPFR_RNDN &&
 291                     MPFR_GET_EXP (s) == __gmpfr_emin - 1 + scale &&
 292                     mpfr_powerof2_raw (s))
 293                   {
 294                     MPFR_LOG_MSG (("Double rounding\n", 0));
 295                     rnd_mode = (MPFR_IS_NEG (s) ? inexact <= 0 : inexact >= 0)
 296                       ? MPFR_RNDZ : MPFR_RNDA;
 297                   }
 298                 inex2 = mpfr_div_2ui (s, s, scale, rnd_mode);
 299                 MPFR_LOG_MSG (("inex2=%d\n", inex2));
 300                 if (inex2)  /* underflow */
 301                   inexact = inex2;
 302               }
 303           }
 304
 305           /* FIXME/TODO: I'm not sure that the following is correct.
 306              Check for possible spurious exceptions due to intermediate
 307              computations. */
 308           MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
 309           goto end;
 310         }
 311     }
 312
 313   inexact = mpfr_add (s, u, z, rnd_mode);
 314   MPFR_GROUP_CLEAR (group);
 315   MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
 316  end:
 317   MPFR_SAVE_EXPO_FREE (expo);
 318   return mpfr_check_range (s, inexact, rnd_mode);
 319 }