1 /* mpfr_fma -- Floating multiply-add
3 Copyright 2001-2002, 2004, 2006-2015 Free Software Foundation, Inc.
4 Contributed by the AriC and Caramel projects, INRIA.
6 This file is part of the GNU MPFR Library.
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
20 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
23 #include "mpfr-impl.h"
25 /* The fused-multiply-add (fma) of x, y and z is defined by:
30 mpfr_fma (mpfr_ptr s
, mpfr_srcptr x
, mpfr_srcptr y
, mpfr_srcptr z
,
35 MPFR_SAVE_EXPO_DECL (expo
);
36 MPFR_GROUP_DECL(group
);
39 (("x[%Pu]=%.*Rg y[%Pu]=%.*Rg z[%Pu]=%.*Rg rnd=%d",
40 mpfr_get_prec (x
), mpfr_log_prec
, x
,
41 mpfr_get_prec (y
), mpfr_log_prec
, y
,
42 mpfr_get_prec (z
), mpfr_log_prec
, z
, rnd_mode
),
43 ("s[%Pu]=%.*Rg inexact=%d",
44 mpfr_get_prec (s
), mpfr_log_prec
, s
, inexact
));
46 /* particular cases */
47 if (MPFR_UNLIKELY( MPFR_IS_SINGULAR(x
) ||
48 MPFR_IS_SINGULAR(y
) ||
49 MPFR_IS_SINGULAR(z
) ))
51 if (MPFR_IS_NAN(x
) || MPFR_IS_NAN(y
) || MPFR_IS_NAN(z
))
56 /* now neither x, y or z is NaN */
57 else if (MPFR_IS_INF(x
) || MPFR_IS_INF(y
))
59 /* cases Inf*0+z, 0*Inf+z, Inf-Inf */
60 if ((MPFR_IS_ZERO(y
)) ||
63 ((MPFR_MULT_SIGN(MPFR_SIGN(x
), MPFR_SIGN(y
))) != MPFR_SIGN(z
))))
68 else if (MPFR_IS_INF(z
)) /* case Inf-Inf already checked above */
71 MPFR_SET_SAME_SIGN(s
, z
);
74 else /* z is finite */
77 MPFR_SET_SIGN(s
, MPFR_MULT_SIGN(MPFR_SIGN(x
) , MPFR_SIGN(y
)));
81 /* now x and y are finite */
82 else if (MPFR_IS_INF(z
))
85 MPFR_SET_SAME_SIGN(s
, z
);
88 else if (MPFR_IS_ZERO(x
) || MPFR_IS_ZERO(y
))
93 sign_p
= MPFR_MULT_SIGN( MPFR_SIGN(x
) , MPFR_SIGN(y
) );
94 MPFR_SET_SIGN(s
,(rnd_mode
!= MPFR_RNDD
?
95 ((MPFR_IS_NEG_SIGN(sign_p
) && MPFR_IS_NEG(z
))
97 ((MPFR_IS_POS_SIGN(sign_p
) && MPFR_IS_POS(z
))
103 return mpfr_set (s
, z
, rnd_mode
);
105 else /* necessarily z is zero here */
107 MPFR_ASSERTD(MPFR_IS_ZERO(z
));
108 return mpfr_mul (s
, x
, y
, rnd_mode
);
112 /* If we take prec(u) >= prec(x) + prec(y), the product u <- x*y
113 is exact, except in case of overflow or underflow. */
114 MPFR_SAVE_EXPO_MARK (expo
);
115 MPFR_GROUP_INIT_1 (group
, MPFR_PREC(x
) + MPFR_PREC(y
), u
);
117 if (MPFR_UNLIKELY (mpfr_mul (u
, x
, y
, MPFR_RNDN
)))
119 /* overflow or underflow - this case is regarded as rare, thus
120 does not need to be very efficient (even if some tests below
121 could have been done earlier).
122 It is an overflow iff u is an infinity (since MPFR_RNDN was used).
123 Alternatively, we could test the overflow flag, but in this case,
124 mpfr_clear_flags would have been necessary. */
126 if (MPFR_IS_INF (u
)) /* overflow */
128 MPFR_LOG_MSG (("Overflow on x*y\n", 0));
130 /* Let's eliminate the obvious case where x*y and z have the
131 same sign. No possible cancellation -> real overflow.
132 Also, we know that |z| < 2^emax. If E(x) + E(y) >= emax+3,
133 then |x*y| >= 2^(emax+1), and |x*y + z| >= 2^emax. This case
134 is also an overflow. */
135 if (MPFR_SIGN (u
) == MPFR_SIGN (z
) ||
136 MPFR_GET_EXP (x
) + MPFR_GET_EXP (y
) >= __gmpfr_emax
+ 3)
138 MPFR_GROUP_CLEAR (group
);
139 MPFR_SAVE_EXPO_FREE (expo
);
140 return mpfr_overflow (s
, rnd_mode
, MPFR_SIGN (z
));
143 /* E(x) + E(y) <= emax+2, therefore |x*y| < 2^(emax+2), and
144 (x/4)*y does not overflow (let's recall that the result
145 is exact with an unbounded exponent range). It does not
146 underflow either, because x*y overflows and the exponent
147 range is large enough. */
148 inexact
= mpfr_div_2ui (u
, x
, 2, MPFR_RNDN
);
149 MPFR_ASSERTN (inexact
== 0);
150 inexact
= mpfr_mul (u
, u
, y
, MPFR_RNDN
);
151 MPFR_ASSERTN (inexact
== 0);
153 /* Now, we need to add z/4... But it may underflow! */
157 MPFR_BLOCK_DECL (flags
);
159 if (MPFR_GET_EXP (u
) > MPFR_GET_EXP (z
) &&
160 MPFR_GET_EXP (u
) - MPFR_GET_EXP (z
) > MPFR_PREC (u
))
162 /* |z| < ulp(u)/2, therefore one can use z instead of z/4. */
167 mpfr_init2 (zo4
, MPFR_PREC (z
));
168 if (mpfr_div_2ui (zo4
, z
, 2, MPFR_RNDZ
))
170 /* The division by 4 underflowed! */
171 MPFR_ASSERTN (0); /* TODO... */
176 /* Let's recall that u = x*y/4 and zz = z/4 (or z if the
177 following addition would give the same result). */
178 MPFR_BLOCK (flags
, inexact
= mpfr_add (s
, u
, zz
, rnd_mode
));
179 /* u and zz have different signs, so that an overflow
180 is not possible. But an underflow is theoretically
182 if (MPFR_UNDERFLOW (flags
))
184 MPFR_ASSERTN (zz
!= z
);
185 MPFR_ASSERTN (0); /* TODO... */
186 mpfr_clears (zo4
, u
, (mpfr_ptr
) 0);
194 MPFR_GROUP_CLEAR (group
);
195 MPFR_ASSERTN (! MPFR_OVERFLOW (flags
));
196 inex2
= mpfr_mul_2ui (s
, s
, 2, rnd_mode
);
197 if (inex2
) /* overflow */
200 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo
, __gmpfr_flags
);
206 else /* underflow: one has |xy| < 2^(emin-1). */
208 unsigned long scale
= 0;
215 MPFR_LOG_MSG (("Underflow on x*y\n", 0));
217 /* Let's scale z so that ulp(z) > 2^emin and ulp(s) > 2^emin
218 (the + 1 on MPFR_PREC (s) is necessary because the exponent
219 of the result can be EXP(z) - 1). */
220 diffexp
= MPFR_GET_EXP (z
) - __gmpfr_emin
;
221 pzs
= MAX (MPFR_PREC (z
), MPFR_PREC (s
) + 1);
222 MPFR_LOG_MSG (("diffexp=%" MPFR_EXP_FSPEC
"d pzs=%Pd\n",
228 MPFR_BLOCK_DECL (flags
);
230 uscale
= (mpfr_uexp_t
) pzs
- diffexp
+ 1;
231 MPFR_ASSERTN (uscale
> 0);
232 MPFR_ASSERTN (uscale
<= ULONG_MAX
);
234 mpfr_init2 (scaled_z
, MPFR_PREC (z
));
235 inexact
= mpfr_mul_2ui (scaled_z
, z
, scale
, MPFR_RNDN
);
236 MPFR_ASSERTN (inexact
== 0); /* TODO: overflow case */
238 /* Now we need to recompute u = xy * 2^scale. */
240 if (MPFR_GET_EXP (x
) < MPFR_GET_EXP (y
))
242 mpfr_init2 (scaled_v
, MPFR_PREC (x
));
243 mpfr_mul_2ui (scaled_v
, x
, scale
, MPFR_RNDN
);
244 mpfr_mul (u
, scaled_v
, y
, MPFR_RNDN
);
248 mpfr_init2 (scaled_v
, MPFR_PREC (y
));
249 mpfr_mul_2ui (scaled_v
, y
, scale
, MPFR_RNDN
);
250 mpfr_mul (u
, x
, scaled_v
, MPFR_RNDN
);
252 mpfr_clear (scaled_v
);
253 MPFR_ASSERTN (! MPFR_OVERFLOW (flags
));
254 xy_underflows
= MPFR_UNDERFLOW (flags
);
262 MPFR_LOG_MSG (("scale=%lu xy_underflows=%d\n",
263 scale
, xy_underflows
));
267 /* Let's replace xy by sign(xy) * 2^(emin-1). */
268 MPFR_PREC (u
) = MPFR_PREC_MIN
;
269 mpfr_setmin (u
, __gmpfr_emin
);
270 MPFR_SET_SIGN (u
, MPFR_MULT_SIGN (MPFR_SIGN (x
),
275 MPFR_BLOCK_DECL (flags
);
277 MPFR_BLOCK (flags
, inexact
= mpfr_add (s
, u
, new_z
, rnd_mode
));
278 MPFR_LOG_MSG (("inexact=%d\n", inexact
));
279 MPFR_GROUP_CLEAR (group
);
284 mpfr_clear (scaled_z
);
285 /* Here an overflow is theoretically possible, in which case
286 the result may be wrong, hence the assert. An underflow
287 is not possible, but let's check that anyway. */
288 MPFR_ASSERTN (! MPFR_OVERFLOW (flags
)); /* TODO... */
289 MPFR_ASSERTN (! MPFR_UNDERFLOW (flags
)); /* not possible */
290 if (rnd_mode
== MPFR_RNDN
&&
291 MPFR_GET_EXP (s
) == __gmpfr_emin
- 1 + scale
&&
292 mpfr_powerof2_raw (s
))
294 MPFR_LOG_MSG (("Double rounding\n", 0));
295 rnd_mode
= (MPFR_IS_NEG (s
) ? inexact
<= 0 : inexact
>= 0)
296 ? MPFR_RNDZ
: MPFR_RNDA
;
298 inex2
= mpfr_div_2ui (s
, s
, scale
, rnd_mode
);
299 MPFR_LOG_MSG (("inex2=%d\n", inex2
));
300 if (inex2
) /* underflow */
305 /* FIXME/TODO: I'm not sure that the following is correct.
306 Check for possible spurious exceptions due to intermediate
308 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo
, __gmpfr_flags
);
313 inexact
= mpfr_add (s
, u
, z
, rnd_mode
);
314 MPFR_GROUP_CLEAR (group
);
315 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo
, __gmpfr_flags
);
317 MPFR_SAVE_EXPO_FREE (expo
);
318 return mpfr_check_range (s
, inexact
, rnd_mode
);