1 /* mpfr_fma -- Floating multiply-add
3 Copyright 2001, 2002, 2004, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
4 Contributed by the Arenaire and Cacao projects, INRIA.
6 This file is part of the GNU MPFR Library.
8 The GNU MPFR Library is free software; you can redistribute
9 it and/or modify it under the terms of the GNU Lesser
10 General Public License as published by the Free Software
11 Foundation; either version 2.1 of the License, or (at your
12 option) any later version.
14 The GNU MPFR Library is distributed in the hope that it will
15 be useful, but WITHOUT ANY WARRANTY; without even the
16 implied warranty of MERCHANTABILITY or FITNESS FOR A
17 PARTICULAR PURPOSE. See the GNU Lesser General Public
18 License for more details.
20 You should have received a copy of the GNU Lesser
21 General Public License along with the GNU MPFR Library; see
22 the file COPYING.LIB. If not, write to the Free Software
23 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
24 MA 02110-1301, USA. */
26 #include "mpfr-impl.h"
28 /* The fused-multiply-add (fma) of x, y and z is defined by:
33 mpfr_fma (mpfr_ptr s
, mpfr_srcptr x
, mpfr_srcptr y
, mpfr_srcptr z
,
38 MPFR_SAVE_EXPO_DECL (expo
);
40 /* particular cases */
41 if (MPFR_UNLIKELY( MPFR_IS_SINGULAR(x
) ||
42 MPFR_IS_SINGULAR(y
) ||
43 MPFR_IS_SINGULAR(z
) ))
45 if (MPFR_IS_NAN(x
) || MPFR_IS_NAN(y
) || MPFR_IS_NAN(z
))
50 /* now neither x, y or z is NaN */
51 else if (MPFR_IS_INF(x
) || MPFR_IS_INF(y
))
53 /* cases Inf*0+z, 0*Inf+z, Inf-Inf */
54 if ((MPFR_IS_ZERO(y
)) ||
57 ((MPFR_MULT_SIGN(MPFR_SIGN(x
), MPFR_SIGN(y
))) != MPFR_SIGN(z
))))
62 else if (MPFR_IS_INF(z
)) /* case Inf-Inf already checked above */
65 MPFR_SET_SAME_SIGN(s
, z
);
68 else /* z is finite */
71 MPFR_SET_SIGN(s
, MPFR_MULT_SIGN(MPFR_SIGN(x
) , MPFR_SIGN(y
)));
75 /* now x and y are finite */
76 else if (MPFR_IS_INF(z
))
79 MPFR_SET_SAME_SIGN(s
, z
);
82 else if (MPFR_IS_ZERO(x
) || MPFR_IS_ZERO(y
))
87 sign_p
= MPFR_MULT_SIGN( MPFR_SIGN(x
) , MPFR_SIGN(y
) );
88 MPFR_SET_SIGN(s
,(rnd_mode
!= GMP_RNDD
?
89 ((MPFR_IS_NEG_SIGN(sign_p
) && MPFR_IS_NEG(z
))
91 ((MPFR_IS_POS_SIGN(sign_p
) && MPFR_IS_POS(z
))
97 return mpfr_set (s
, z
, rnd_mode
);
99 else /* necessarily z is zero here */
101 MPFR_ASSERTD(MPFR_IS_ZERO(z
));
102 return mpfr_mul (s
, x
, y
, rnd_mode
);
106 /* If we take prec(u) >= prec(x) + prec(y), the product u <- x*y
107 is exact, except in case of overflow or underflow. */
108 MPFR_SAVE_EXPO_MARK (expo
);
109 mpfr_init2 (u
, MPFR_PREC(x
) + MPFR_PREC(y
));
111 if (MPFR_UNLIKELY (mpfr_mul (u
, x
, y
, GMP_RNDN
)))
113 /* overflow or underflow - this case is regarded as rare, thus
114 does not need to be very efficient (even if some tests below
115 could have been done earlier).
116 It is an overflow iff u is an infinity (since GMP_RNDN was used).
117 Alternatively, we could test the overflow flag, but in this case,
118 mpfr_clear_flags would have been necessary. */
119 if (MPFR_IS_INF (u
)) /* overflow */
121 /* Let's eliminate the obvious case where x*y and z have the
122 same sign. No possible cancellation -> real overflow.
123 Also, we know that |z| < 2^emax. If E(x) + E(y) >= emax+3,
124 then |x*y| >= 2^(emax+1), and |x*y + z| >= 2^emax. This case
125 is also an overflow. */
126 if (MPFR_SIGN (u
) == MPFR_SIGN (z
) ||
127 MPFR_GET_EXP (x
) + MPFR_GET_EXP (y
) >= __gmpfr_emax
+ 3)
130 MPFR_SAVE_EXPO_FREE (expo
);
131 return mpfr_overflow (s
, rnd_mode
, MPFR_SIGN (z
));
134 /* E(x) + E(y) <= emax+2, therefore |x*y| < 2^(emax+2), and
135 (x/4)*y does not overflow (let's recall that the result
136 is exact with an unbounded exponent range). It does not
137 underflow either, because x*y overflows and the exponent
138 range is large enough. */
139 inexact
= mpfr_div_2ui (u
, x
, 2, GMP_RNDN
);
140 MPFR_ASSERTN (inexact
== 0);
141 inexact
= mpfr_mul (u
, u
, y
, GMP_RNDN
);
142 MPFR_ASSERTN (inexact
== 0);
144 /* Now, we need to add z/4... But it may underflow! */
148 MPFR_BLOCK_DECL (flags
);
150 if (MPFR_GET_EXP (u
) > MPFR_GET_EXP (z
) &&
151 MPFR_GET_EXP (u
) - MPFR_GET_EXP (z
) > MPFR_PREC (u
))
153 /* |z| < ulp(u)/2, therefore one can use z instead of z/4. */
158 mpfr_init2 (zo4
, MPFR_PREC (z
));
159 if (mpfr_div_2ui (zo4
, z
, 2, GMP_RNDZ
))
161 /* The division by 4 underflowed! */
162 MPFR_ASSERTN (0); /* TODO... */
167 /* Let's recall that u = x*y/4 and zz = z/4 (or z if the
168 following addition would give the same result). */
169 MPFR_BLOCK (flags
, inexact
= mpfr_add (s
, u
, zz
, rnd_mode
));
170 /* u and zz have different signs, so that an overflow
171 is not possible. But an underflow is theoretically
173 if (MPFR_UNDERFLOW (flags
))
175 MPFR_ASSERTN (zz
!= z
);
176 MPFR_ASSERTN (0); /* TODO... */
177 mpfr_clears (zo4
, u
, (mpfr_ptr
) 0);
186 MPFR_ASSERTN (! MPFR_OVERFLOW (flags
));
187 inex2
= mpfr_mul_2ui (s
, s
, 2, rnd_mode
);
188 if (inex2
) /* overflow */
191 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo
, __gmpfr_flags
);
197 else /* underflow: one has |xy| < 2^(emin-1). */
199 unsigned long scale
= 0;
206 /* Let's scale z so that ulp(z) > 2^emin and ulp(s) > 2^emin
207 (the + 1 on MPFR_PREC (s) is necessary because the exponent
208 of the result can be EXP(z) - 1). */
209 diffexp
= MPFR_GET_EXP (z
) - __gmpfr_emin
;
210 pzs
= MAX (MPFR_PREC (z
), MPFR_PREC (s
) + 1);
213 mp_exp_unsigned_t uscale
;
215 MPFR_BLOCK_DECL (flags
);
217 uscale
= (mp_exp_unsigned_t
) pzs
- diffexp
+ 1;
218 MPFR_ASSERTN (uscale
> 0);
219 MPFR_ASSERTN (uscale
<= ULONG_MAX
);
221 mpfr_init2 (scaled_z
, MPFR_PREC (z
));
222 inexact
= mpfr_mul_2ui (scaled_z
, z
, scale
, GMP_RNDN
);
223 MPFR_ASSERTN (inexact
== 0); /* TODO: overflow case */
225 /* Now we need to recompute u = xy * 2^scale. */
227 if (MPFR_GET_EXP (x
) < MPFR_GET_EXP (y
))
229 mpfr_init2 (scaled_v
, MPFR_PREC (x
));
230 mpfr_mul_2ui (scaled_v
, x
, scale
, GMP_RNDN
);
231 mpfr_mul (u
, scaled_v
, y
, GMP_RNDN
);
235 mpfr_init2 (scaled_v
, MPFR_PREC (y
));
236 mpfr_mul_2ui (scaled_v
, y
, scale
, GMP_RNDN
);
237 mpfr_mul (u
, x
, scaled_v
, GMP_RNDN
);
239 mpfr_clear (scaled_v
);
240 MPFR_ASSERTN (! MPFR_OVERFLOW (flags
));
241 xy_underflows
= MPFR_UNDERFLOW (flags
);
251 /* Let's replace xy by sign(xy) * 2^(emin-1). */
252 mpfr_set_prec (u
, MPFR_PREC_MIN
);
253 mpfr_setmin (u
, __gmpfr_emin
);
254 MPFR_SET_SIGN (u
, MPFR_MULT_SIGN (MPFR_SIGN (x
),
259 MPFR_BLOCK_DECL (flags
);
261 MPFR_BLOCK (flags
, inexact
= mpfr_add (s
, u
, new_z
, rnd_mode
));
268 mpfr_clear (scaled_z
);
269 /* Here an overflow is theoretically possible, in which case
270 the result may be wrong, hence the assert. An underflow
271 is not possible, but let's check that anyway. */
272 MPFR_ASSERTN (! MPFR_OVERFLOW (flags
)); /* TODO... */
273 MPFR_ASSERTN (! MPFR_UNDERFLOW (flags
)); /* not possible */
274 inex2
= mpfr_div_2ui (s
, s
, scale
, GMP_RNDN
);
275 /* FIXME: this seems incorrect. GMP_RNDN -> rnd_mode?
276 Also, handle the double rounding case:
277 s / 2^scale = 2^(emin - 2) in GMP_RNDN. */
278 if (inex2
) /* underflow */
283 /* FIXME/TODO: I'm not sure that the following is correct.
284 Check for possible spurious exceptions due to intermediate
286 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo
, __gmpfr_flags
);
291 inexact
= mpfr_add (s
, u
, z
, rnd_mode
);
293 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo
, __gmpfr_flags
);
295 MPFR_SAVE_EXPO_FREE (expo
);
296 return mpfr_check_range (s
, inexact
, rnd_mode
);