contrib/mpfr/src/fms.c

   1 /* mpfr_fms -- Floating multiply-subtract
   2
   3 Copyright 2001, 2002, 2004, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
   4 Contributed by the AriC and Caramel projects, INRIA.
   5
   6 This file is part of the GNU MPFR Library.
   7
   8 The GNU MPFR Library is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU Lesser General Public License as published by
  10 the Free Software Foundation; either version 3 of the License, or (at your
  11 option) any later version.
  12
  13 The GNU MPFR Library is distributed in the hope that it will be useful, but
  14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  16 License for more details.
  17
  18 You should have received a copy of the GNU Lesser General Public License
  19 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
  20 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
  21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
  22
  23 #include "mpfr-impl.h"
  24
  25 /* The fused-multiply-subtract (fms) of x, y and z is defined by:
  26    fms(x,y,z)= x*y - z
  27    Note: this is neither in IEEE754R, nor in LIA-2, but both the
  28    PowerPC and the Itanium define fms as x*y - z.
  29 */
  30
  31 int
  32 mpfr_fms (mpfr_ptr s, mpfr_srcptr x, mpfr_srcptr y, mpfr_srcptr z,
  33           mpfr_rnd_t rnd_mode)
  34 {
  35   int inexact;
  36   mpfr_t u;
  37   MPFR_SAVE_EXPO_DECL (expo);
  38   MPFR_GROUP_DECL(group);
  39
  40   MPFR_LOG_FUNC
  41     (("x[%Pu]=%.*Rg y[%Pu]=%.*Rg  z[%Pu]=%.*Rg rnd=%d",
  42       mpfr_get_prec (x), mpfr_log_prec, x,
  43       mpfr_get_prec (y), mpfr_log_prec, y,
  44       mpfr_get_prec (z), mpfr_log_prec, z, rnd_mode),
  45      ("s[%Pu]=%.*Rg inexact=%d",
  46       mpfr_get_prec (s), mpfr_log_prec, s, inexact));
  47
  48   /* particular cases */
  49   if (MPFR_UNLIKELY( MPFR_IS_SINGULAR(x) ||
  50                      MPFR_IS_SINGULAR(y) ||
  51                      MPFR_IS_SINGULAR(z) ))
  52     {
  53       if (MPFR_IS_NAN(x) || MPFR_IS_NAN(y) || MPFR_IS_NAN(z))
  54         {
  55           MPFR_SET_NAN(s);
  56           MPFR_RET_NAN;
  57         }
  58       /* now neither x, y or z is NaN */
  59       else if (MPFR_IS_INF(x) || MPFR_IS_INF(y))
  60         {
  61           /* cases Inf*0-z, 0*Inf-z, Inf-Inf */
  62           if ((MPFR_IS_ZERO(y)) ||
  63               (MPFR_IS_ZERO(x)) ||
  64               (MPFR_IS_INF(z) &&
  65                ((MPFR_MULT_SIGN(MPFR_SIGN(x), MPFR_SIGN(y))) == MPFR_SIGN(z))))
  66             {
  67               MPFR_SET_NAN(s);
  68               MPFR_RET_NAN;
  69             }
  70           else if (MPFR_IS_INF(z)) /* case Inf-Inf already checked above */
  71             {
  72               MPFR_SET_INF(s);
  73               MPFR_SET_OPPOSITE_SIGN(s, z);
  74               MPFR_RET(0);
  75             }
  76           else /* z is finite */
  77             {
  78               MPFR_SET_INF(s);
  79               MPFR_SET_SIGN(s, MPFR_MULT_SIGN(MPFR_SIGN(x) , MPFR_SIGN(y)));
  80               MPFR_RET(0);
  81             }
  82         }
  83       /* now x and y are finite */
  84       else if (MPFR_IS_INF(z))
  85         {
  86           MPFR_SET_INF(s);
  87           MPFR_SET_OPPOSITE_SIGN(s, z);
  88           MPFR_RET(0);
  89         }
  90       else if (MPFR_IS_ZERO(x) || MPFR_IS_ZERO(y))
  91         {
  92           if (MPFR_IS_ZERO(z))
  93             {
  94               int sign_p;
  95               sign_p = MPFR_MULT_SIGN( MPFR_SIGN(x) , MPFR_SIGN(y) );
  96               MPFR_SET_SIGN(s,(rnd_mode != MPFR_RNDD ?
  97                                ((MPFR_IS_NEG_SIGN(sign_p) && MPFR_IS_POS(z))
  98                                 ? -1 : 1) :
  99                                ((MPFR_IS_POS_SIGN(sign_p) && MPFR_IS_NEG(z))
 100                                 ? 1 : -1)));
 101               MPFR_SET_ZERO(s);
 102               MPFR_RET(0);
 103             }
 104           else
 105             return mpfr_neg (s, z, rnd_mode);
 106         }
 107       else /* necessarily z is zero here */
 108         {
 109           MPFR_ASSERTD(MPFR_IS_ZERO(z));
 110           return mpfr_mul (s, x, y, rnd_mode);
 111         }
 112     }
 113
 114   /* If we take prec(u) >= prec(x) + prec(y), the product u <- x*y
 115      is exact, except in case of overflow or underflow. */
 116   MPFR_SAVE_EXPO_MARK (expo);
 117   MPFR_GROUP_INIT_1 (group, MPFR_PREC(x) + MPFR_PREC(y), u);
 118
 119   if (MPFR_UNLIKELY (mpfr_mul (u, x, y, MPFR_RNDN)))
 120     {
 121       /* overflow or underflow - this case is regarded as rare, thus
 122          does not need to be very efficient (even if some tests below
 123          could have been done earlier).
 124          It is an overflow iff u is an infinity (since MPFR_RNDN was used).
 125          Alternatively, we could test the overflow flag, but in this case,
 126          mpfr_clear_flags would have been necessary. */
 127       if (MPFR_IS_INF (u))  /* overflow */
 128         {
 129           /* Let's eliminate the obvious case where x*y and z have the
 130              same sign. No possible cancellation -> real overflow.
 131              Also, we know that |z| < 2^emax. If E(x) + E(y) >= emax+3,
 132              then |x*y| >= 2^(emax+1), and |x*y - z| >= 2^emax. This case
 133              is also an overflow. */
 134           if (MPFR_SIGN (u) != MPFR_SIGN (z) ||
 135               MPFR_GET_EXP (x) + MPFR_GET_EXP (y) >= __gmpfr_emax + 3)
 136             {
 137               MPFR_GROUP_CLEAR (group);
 138               MPFR_SAVE_EXPO_FREE (expo);
 139               return mpfr_overflow (s, rnd_mode, - MPFR_SIGN (z));
 140             }
 141
 142           /* E(x) + E(y) <= emax+2, therefore |x*y| < 2^(emax+2), and
 143              (x/4)*y does not overflow (let's recall that the result
 144              is exact with an unbounded exponent range). It does not
 145              underflow either, because x*y overflows and the exponent
 146              range is large enough. */
 147           inexact = mpfr_div_2ui (u, x, 2, MPFR_RNDN);
 148           MPFR_ASSERTN (inexact == 0);
 149           inexact = mpfr_mul (u, u, y, MPFR_RNDN);
 150           MPFR_ASSERTN (inexact == 0);
 151
 152           /* Now, we need to subtract z/4... But it may underflow! */
 153           {
 154             mpfr_t zo4;
 155             mpfr_srcptr zz;
 156             MPFR_BLOCK_DECL (flags);
 157
 158             if (MPFR_GET_EXP (u) > MPFR_GET_EXP (z) &&
 159                 MPFR_GET_EXP (u) - MPFR_GET_EXP (z) > MPFR_PREC (u))
 160               {
 161                 /* |z| < ulp(u)/2, therefore one can use z instead of z/4. */
 162                 zz = z;
 163               }
 164             else
 165               {
 166                 mpfr_init2 (zo4, MPFR_PREC (z));
 167                 if (mpfr_div_2ui (zo4, z, 2, MPFR_RNDZ))
 168                   {
 169                     /* The division by 4 underflowed! */
 170                     MPFR_ASSERTN (0); /* TODO... */
 171                   }
 172                 zz = zo4;
 173               }
 174
 175             /* Let's recall that u = x*y/4 and zz = z/4 (or z if the
 176                following subtraction would give the same result). */
 177             MPFR_BLOCK (flags, inexact = mpfr_sub (s, u, zz, rnd_mode));
 178             /* u and zz have the same sign, so that an overflow
 179                is not possible. But an underflow is theoretically
 180                possible! */
 181             if (MPFR_UNDERFLOW (flags))
 182               {
 183                 MPFR_ASSERTN (zz != z);
 184                 MPFR_ASSERTN (0); /* TODO... */
 185                 mpfr_clears (zo4, u, (mpfr_ptr) 0);
 186               }
 187             else
 188               {
 189                 int inex2;
 190
 191                 if (zz != z)
 192                   mpfr_clear (zo4);
 193                 MPFR_GROUP_CLEAR (group);
 194                 MPFR_ASSERTN (! MPFR_OVERFLOW (flags));
 195                 inex2 = mpfr_mul_2ui (s, s, 2, rnd_mode);
 196                 if (inex2)  /* overflow */
 197                   {
 198                     inexact = inex2;
 199                     MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
 200                   }
 201                 goto end;
 202               }
 203           }
 204         }
 205       else  /* underflow: one has |xy| < 2^(emin-1). */
 206         {
 207           unsigned long scale = 0;
 208           mpfr_t scaled_z;
 209           mpfr_srcptr new_z;
 210           mpfr_exp_t diffexp;
 211           mpfr_prec_t pzs;
 212           int xy_underflows;
 213
 214           /* Let's scale z so that ulp(z) > 2^emin and ulp(s) > 2^emin
 215              (the + 1 on MPFR_PREC (s) is necessary because the exponent
 216              of the result can be EXP(z) - 1). */
 217           diffexp = MPFR_GET_EXP (z) - __gmpfr_emin;
 218           pzs = MAX (MPFR_PREC (z), MPFR_PREC (s) + 1);
 219           if (diffexp <= pzs)
 220             {
 221               mpfr_uexp_t uscale;
 222               mpfr_t scaled_v;
 223               MPFR_BLOCK_DECL (flags);
 224
 225               uscale = (mpfr_uexp_t) pzs - diffexp + 1;
 226               MPFR_ASSERTN (uscale > 0);
 227               MPFR_ASSERTN (uscale <= ULONG_MAX);
 228               scale = uscale;
 229               mpfr_init2 (scaled_z, MPFR_PREC (z));
 230               inexact = mpfr_mul_2ui (scaled_z, z, scale, MPFR_RNDN);
 231               MPFR_ASSERTN (inexact == 0);  /* TODO: overflow case */
 232               new_z = scaled_z;
 233               /* Now we need to recompute u = xy * 2^scale. */
 234               MPFR_BLOCK (flags,
 235                           if (MPFR_GET_EXP (x) < MPFR_GET_EXP (y))
 236                             {
 237                               mpfr_init2 (scaled_v, MPFR_PREC (x));
 238                               mpfr_mul_2ui (scaled_v, x, scale, MPFR_RNDN);
 239                               mpfr_mul (u, scaled_v, y, MPFR_RNDN);
 240                             }
 241                           else
 242                             {
 243                               mpfr_init2 (scaled_v, MPFR_PREC (y));
 244                               mpfr_mul_2ui (scaled_v, y, scale, MPFR_RNDN);
 245                               mpfr_mul (u, x, scaled_v, MPFR_RNDN);
 246                             });
 247               mpfr_clear (scaled_v);
 248               MPFR_ASSERTN (! MPFR_OVERFLOW (flags));
 249               xy_underflows = MPFR_UNDERFLOW (flags);
 250             }
 251           else
 252             {
 253               new_z = z;
 254               xy_underflows = 1;
 255             }
 256
 257           if (xy_underflows)
 258             {
 259               /* Let's replace xy by sign(xy) * 2^(emin-1). */
 260               MPFR_PREC (u) = MPFR_PREC_MIN;
 261               mpfr_setmin (u, __gmpfr_emin);
 262               MPFR_SET_SIGN (u, MPFR_MULT_SIGN (MPFR_SIGN (x),
 263                                                 MPFR_SIGN (y)));
 264             }
 265
 266           {
 267             MPFR_BLOCK_DECL (flags);
 268
 269             MPFR_BLOCK (flags, inexact = mpfr_sub (s, u, new_z, rnd_mode));
 270             MPFR_GROUP_CLEAR (group);
 271             if (scale != 0)
 272               {
 273                 int inex2;
 274
 275                 mpfr_clear (scaled_z);
 276                 /* Here an overflow is theoretically possible, in which case
 277                    the result may be wrong, hence the assert. An underflow
 278                    is not possible, but let's check that anyway. */
 279                 MPFR_ASSERTN (! MPFR_OVERFLOW (flags));  /* TODO... */
 280                 MPFR_ASSERTN (! MPFR_UNDERFLOW (flags));  /* not possible */
 281                 inex2 = mpfr_div_2ui (s, s, scale, MPFR_RNDN);
 282                 /* FIXME: this seems incorrect. MPFR_RNDN -> rnd_mode?
 283                    Also, handle the double rounding case:
 284                    s / 2^scale = 2^(emin - 2) in MPFR_RNDN. */
 285                 if (inex2)  /* underflow */
 286                   inexact = inex2;
 287               }
 288           }
 289
 290           /* FIXME/TODO: I'm not sure that the following is correct.
 291              Check for possible spurious exceptions due to intermediate
 292              computations. */
 293           MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
 294           goto end;
 295         }
 296     }
 297
 298   inexact = mpfr_sub (s, u, z, rnd_mode);
 299   MPFR_GROUP_CLEAR (group);
 300   MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
 301  end:
 302   MPFR_SAVE_EXPO_FREE (expo);
 303   return mpfr_check_range (s, inexact, rnd_mode);
 304 }