beta-0.89.2
[luatex.git] / source / libs / mpfr / mpfr-3.1.3 / src / fma.c
blob8acb617f7d554a95821e278f0cff8a77cb4ab0f1
1 /* mpfr_fma -- Floating multiply-add
3 Copyright 2001-2002, 2004, 2006-2015 Free Software Foundation, Inc.
4 Contributed by the AriC and Caramel projects, INRIA.
6 This file is part of the GNU MPFR Library.
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
20 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
23 #include "mpfr-impl.h"
25 /* The fused-multiply-add (fma) of x, y and z is defined by:
26 fma(x,y,z)= x*y + z
29 int
30 mpfr_fma (mpfr_ptr s, mpfr_srcptr x, mpfr_srcptr y, mpfr_srcptr z,
31 mpfr_rnd_t rnd_mode)
33 int inexact;
34 mpfr_t u;
35 MPFR_SAVE_EXPO_DECL (expo);
36 MPFR_GROUP_DECL(group);
38 MPFR_LOG_FUNC
39 (("x[%Pu]=%.*Rg y[%Pu]=%.*Rg z[%Pu]=%.*Rg rnd=%d",
40 mpfr_get_prec (x), mpfr_log_prec, x,
41 mpfr_get_prec (y), mpfr_log_prec, y,
42 mpfr_get_prec (z), mpfr_log_prec, z, rnd_mode),
43 ("s[%Pu]=%.*Rg inexact=%d",
44 mpfr_get_prec (s), mpfr_log_prec, s, inexact));
46 /* particular cases */
47 if (MPFR_UNLIKELY( MPFR_IS_SINGULAR(x) ||
48 MPFR_IS_SINGULAR(y) ||
49 MPFR_IS_SINGULAR(z) ))
51 if (MPFR_IS_NAN(x) || MPFR_IS_NAN(y) || MPFR_IS_NAN(z))
53 MPFR_SET_NAN(s);
54 MPFR_RET_NAN;
56 /* now neither x, y or z is NaN */
57 else if (MPFR_IS_INF(x) || MPFR_IS_INF(y))
59 /* cases Inf*0+z, 0*Inf+z, Inf-Inf */
60 if ((MPFR_IS_ZERO(y)) ||
61 (MPFR_IS_ZERO(x)) ||
62 (MPFR_IS_INF(z) &&
63 ((MPFR_MULT_SIGN(MPFR_SIGN(x), MPFR_SIGN(y))) != MPFR_SIGN(z))))
65 MPFR_SET_NAN(s);
66 MPFR_RET_NAN;
68 else if (MPFR_IS_INF(z)) /* case Inf-Inf already checked above */
70 MPFR_SET_INF(s);
71 MPFR_SET_SAME_SIGN(s, z);
72 MPFR_RET(0);
74 else /* z is finite */
76 MPFR_SET_INF(s);
77 MPFR_SET_SIGN(s, MPFR_MULT_SIGN(MPFR_SIGN(x) , MPFR_SIGN(y)));
78 MPFR_RET(0);
81 /* now x and y are finite */
82 else if (MPFR_IS_INF(z))
84 MPFR_SET_INF(s);
85 MPFR_SET_SAME_SIGN(s, z);
86 MPFR_RET(0);
88 else if (MPFR_IS_ZERO(x) || MPFR_IS_ZERO(y))
90 if (MPFR_IS_ZERO(z))
92 int sign_p;
93 sign_p = MPFR_MULT_SIGN( MPFR_SIGN(x) , MPFR_SIGN(y) );
94 MPFR_SET_SIGN(s,(rnd_mode != MPFR_RNDD ?
95 ((MPFR_IS_NEG_SIGN(sign_p) && MPFR_IS_NEG(z))
96 ? -1 : 1) :
97 ((MPFR_IS_POS_SIGN(sign_p) && MPFR_IS_POS(z))
98 ? 1 : -1)));
99 MPFR_SET_ZERO(s);
100 MPFR_RET(0);
102 else
103 return mpfr_set (s, z, rnd_mode);
105 else /* necessarily z is zero here */
107 MPFR_ASSERTD(MPFR_IS_ZERO(z));
108 return mpfr_mul (s, x, y, rnd_mode);
112 /* If we take prec(u) >= prec(x) + prec(y), the product u <- x*y
113 is exact, except in case of overflow or underflow. */
114 MPFR_SAVE_EXPO_MARK (expo);
115 MPFR_GROUP_INIT_1 (group, MPFR_PREC(x) + MPFR_PREC(y), u);
117 if (MPFR_UNLIKELY (mpfr_mul (u, x, y, MPFR_RNDN)))
119 /* overflow or underflow - this case is regarded as rare, thus
120 does not need to be very efficient (even if some tests below
121 could have been done earlier).
122 It is an overflow iff u is an infinity (since MPFR_RNDN was used).
123 Alternatively, we could test the overflow flag, but in this case,
124 mpfr_clear_flags would have been necessary. */
126 if (MPFR_IS_INF (u)) /* overflow */
128 MPFR_LOG_MSG (("Overflow on x*y\n", 0));
130 /* Let's eliminate the obvious case where x*y and z have the
131 same sign. No possible cancellation -> real overflow.
132 Also, we know that |z| < 2^emax. If E(x) + E(y) >= emax+3,
133 then |x*y| >= 2^(emax+1), and |x*y + z| >= 2^emax. This case
134 is also an overflow. */
135 if (MPFR_SIGN (u) == MPFR_SIGN (z) ||
136 MPFR_GET_EXP (x) + MPFR_GET_EXP (y) >= __gmpfr_emax + 3)
138 MPFR_GROUP_CLEAR (group);
139 MPFR_SAVE_EXPO_FREE (expo);
140 return mpfr_overflow (s, rnd_mode, MPFR_SIGN (z));
143 /* E(x) + E(y) <= emax+2, therefore |x*y| < 2^(emax+2), and
144 (x/4)*y does not overflow (let's recall that the result
145 is exact with an unbounded exponent range). It does not
146 underflow either, because x*y overflows and the exponent
147 range is large enough. */
148 inexact = mpfr_div_2ui (u, x, 2, MPFR_RNDN);
149 MPFR_ASSERTN (inexact == 0);
150 inexact = mpfr_mul (u, u, y, MPFR_RNDN);
151 MPFR_ASSERTN (inexact == 0);
153 /* Now, we need to add z/4... But it may underflow! */
155 mpfr_t zo4;
156 mpfr_srcptr zz;
157 MPFR_BLOCK_DECL (flags);
159 if (MPFR_GET_EXP (u) > MPFR_GET_EXP (z) &&
160 MPFR_GET_EXP (u) - MPFR_GET_EXP (z) > MPFR_PREC (u))
162 /* |z| < ulp(u)/2, therefore one can use z instead of z/4. */
163 zz = z;
165 else
167 mpfr_init2 (zo4, MPFR_PREC (z));
168 if (mpfr_div_2ui (zo4, z, 2, MPFR_RNDZ))
170 /* The division by 4 underflowed! */
171 MPFR_ASSERTN (0); /* TODO... */
173 zz = zo4;
176 /* Let's recall that u = x*y/4 and zz = z/4 (or z if the
177 following addition would give the same result). */
178 MPFR_BLOCK (flags, inexact = mpfr_add (s, u, zz, rnd_mode));
179 /* u and zz have different signs, so that an overflow
180 is not possible. But an underflow is theoretically
181 possible! */
182 if (MPFR_UNDERFLOW (flags))
184 MPFR_ASSERTN (zz != z);
185 MPFR_ASSERTN (0); /* TODO... */
186 mpfr_clears (zo4, u, (mpfr_ptr) 0);
188 else
190 int inex2;
192 if (zz != z)
193 mpfr_clear (zo4);
194 MPFR_GROUP_CLEAR (group);
195 MPFR_ASSERTN (! MPFR_OVERFLOW (flags));
196 inex2 = mpfr_mul_2ui (s, s, 2, rnd_mode);
197 if (inex2) /* overflow */
199 inexact = inex2;
200 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
202 goto end;
206 else /* underflow: one has |xy| < 2^(emin-1). */
208 unsigned long scale = 0;
209 mpfr_t scaled_z;
210 mpfr_srcptr new_z;
211 mpfr_exp_t diffexp;
212 mpfr_prec_t pzs;
213 int xy_underflows;
215 MPFR_LOG_MSG (("Underflow on x*y\n", 0));
217 /* Let's scale z so that ulp(z) > 2^emin and ulp(s) > 2^emin
218 (the + 1 on MPFR_PREC (s) is necessary because the exponent
219 of the result can be EXP(z) - 1). */
220 diffexp = MPFR_GET_EXP (z) - __gmpfr_emin;
221 pzs = MAX (MPFR_PREC (z), MPFR_PREC (s) + 1);
222 MPFR_LOG_MSG (("diffexp=%" MPFR_EXP_FSPEC "d pzs=%Pd\n",
223 diffexp, pzs));
224 if (diffexp <= pzs)
226 mpfr_uexp_t uscale;
227 mpfr_t scaled_v;
228 MPFR_BLOCK_DECL (flags);
230 uscale = (mpfr_uexp_t) pzs - diffexp + 1;
231 MPFR_ASSERTN (uscale > 0);
232 MPFR_ASSERTN (uscale <= ULONG_MAX);
233 scale = uscale;
234 mpfr_init2 (scaled_z, MPFR_PREC (z));
235 inexact = mpfr_mul_2ui (scaled_z, z, scale, MPFR_RNDN);
236 MPFR_ASSERTN (inexact == 0); /* TODO: overflow case */
237 new_z = scaled_z;
238 /* Now we need to recompute u = xy * 2^scale. */
239 MPFR_BLOCK (flags,
240 if (MPFR_GET_EXP (x) < MPFR_GET_EXP (y))
242 mpfr_init2 (scaled_v, MPFR_PREC (x));
243 mpfr_mul_2ui (scaled_v, x, scale, MPFR_RNDN);
244 mpfr_mul (u, scaled_v, y, MPFR_RNDN);
246 else
248 mpfr_init2 (scaled_v, MPFR_PREC (y));
249 mpfr_mul_2ui (scaled_v, y, scale, MPFR_RNDN);
250 mpfr_mul (u, x, scaled_v, MPFR_RNDN);
252 mpfr_clear (scaled_v);
253 MPFR_ASSERTN (! MPFR_OVERFLOW (flags));
254 xy_underflows = MPFR_UNDERFLOW (flags);
256 else
258 new_z = z;
259 xy_underflows = 1;
262 MPFR_LOG_MSG (("scale=%lu xy_underflows=%d\n",
263 scale, xy_underflows));
265 if (xy_underflows)
267 /* Let's replace xy by sign(xy) * 2^(emin-1). */
268 MPFR_PREC (u) = MPFR_PREC_MIN;
269 mpfr_setmin (u, __gmpfr_emin);
270 MPFR_SET_SIGN (u, MPFR_MULT_SIGN (MPFR_SIGN (x),
271 MPFR_SIGN (y)));
275 MPFR_BLOCK_DECL (flags);
277 MPFR_BLOCK (flags, inexact = mpfr_add (s, u, new_z, rnd_mode));
278 MPFR_LOG_MSG (("inexact=%d\n", inexact));
279 MPFR_GROUP_CLEAR (group);
280 if (scale != 0)
282 int inex2;
284 mpfr_clear (scaled_z);
285 /* Here an overflow is theoretically possible, in which case
286 the result may be wrong, hence the assert. An underflow
287 is not possible, but let's check that anyway. */
288 MPFR_ASSERTN (! MPFR_OVERFLOW (flags)); /* TODO... */
289 MPFR_ASSERTN (! MPFR_UNDERFLOW (flags)); /* not possible */
290 if (rnd_mode == MPFR_RNDN &&
291 MPFR_GET_EXP (s) == __gmpfr_emin - 1 + scale &&
292 mpfr_powerof2_raw (s))
294 MPFR_LOG_MSG (("Double rounding\n", 0));
295 rnd_mode = (MPFR_IS_NEG (s) ? inexact <= 0 : inexact >= 0)
296 ? MPFR_RNDZ : MPFR_RNDA;
298 inex2 = mpfr_div_2ui (s, s, scale, rnd_mode);
299 MPFR_LOG_MSG (("inex2=%d\n", inex2));
300 if (inex2) /* underflow */
301 inexact = inex2;
305 /* FIXME/TODO: I'm not sure that the following is correct.
306 Check for possible spurious exceptions due to intermediate
307 computations. */
308 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
309 goto end;
313 inexact = mpfr_add (s, u, z, rnd_mode);
314 MPFR_GROUP_CLEAR (group);
315 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
316 end:
317 MPFR_SAVE_EXPO_FREE (expo);
318 return mpfr_check_range (s, inexact, rnd_mode);