1 /* Compute x * y + z as ternary operation.
2 Copyright (C) 2010-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
19 #define NO_MATH_REDIRECT
21 #define f64xfmaf128 __hide_f64xfmaf128
26 #include <math-barriers.h>
27 #include <math_private.h>
28 #include <libm-alias-ldouble.h>
29 #include <math-narrow-alias.h>
31 #include <math-use-builtins.h>
33 /* This implementation uses rounding to odd to avoid problems with
34 double rounding. See a paper by Boldo and Melquiond:
35 http://www.lri.fr/~melquion/doc/08-tc.pdf */
38 __fmal (_Float128 x
, _Float128 y
, _Float128 z
)
41 return __builtin_fmal (x
, y
, z
);
43 union ieee854_long_double u
, v
, w
;
48 if (__builtin_expect (u
.ieee
.exponent
+ v
.ieee
.exponent
49 >= 0x7fff + IEEE854_LONG_DOUBLE_BIAS
51 || __builtin_expect (u
.ieee
.exponent
>= 0x7fff - LDBL_MANT_DIG
, 0)
52 || __builtin_expect (v
.ieee
.exponent
>= 0x7fff - LDBL_MANT_DIG
, 0)
53 || __builtin_expect (w
.ieee
.exponent
>= 0x7fff - LDBL_MANT_DIG
, 0)
54 || __builtin_expect (u
.ieee
.exponent
+ v
.ieee
.exponent
55 <= IEEE854_LONG_DOUBLE_BIAS
+ LDBL_MANT_DIG
, 0))
57 /* If z is Inf, but x and y are finite, the result should be
59 if (w
.ieee
.exponent
== 0x7fff
60 && u
.ieee
.exponent
!= 0x7fff
61 && v
.ieee
.exponent
!= 0x7fff)
63 /* If z is zero and x are y are nonzero, compute the result
64 as x * y to avoid the wrong sign of a zero result if x * y
66 if (z
== 0 && x
!= 0 && y
!= 0)
68 /* If x or y or z is Inf/NaN, or if x * y is zero, compute as
70 if (u
.ieee
.exponent
== 0x7fff
71 || v
.ieee
.exponent
== 0x7fff
72 || w
.ieee
.exponent
== 0x7fff
76 /* If fma will certainly overflow, compute as x * y. */
77 if (u
.ieee
.exponent
+ v
.ieee
.exponent
78 > 0x7fff + IEEE854_LONG_DOUBLE_BIAS
)
80 /* If x * y is less than 1/4 of LDBL_TRUE_MIN, neither the
81 result nor whether there is underflow depends on its exact
82 value, only on its sign. */
83 if (u
.ieee
.exponent
+ v
.ieee
.exponent
84 < IEEE854_LONG_DOUBLE_BIAS
- LDBL_MANT_DIG
- 2)
86 int neg
= u
.ieee
.negative
^ v
.ieee
.negative
;
87 _Float128 tiny
= neg
? L(-0x1p
-16494) : L(0x1p
-16494);
88 if (w
.ieee
.exponent
>= 3)
90 /* Scaling up, adding TINY and scaling down produces the
91 correct result, because in round-to-nearest mode adding
92 TINY has no effect and in other modes double rounding is
93 harmless. But it may not produce required underflow
95 v
.d
= z
* L(0x1p
114) + tiny
;
96 if (TININESS_AFTER_ROUNDING
97 ? v
.ieee
.exponent
< 115
98 : (w
.ieee
.exponent
== 0
99 || (w
.ieee
.exponent
== 1
100 && w
.ieee
.negative
!= neg
101 && w
.ieee
.mantissa3
== 0
102 && w
.ieee
.mantissa2
== 0
103 && w
.ieee
.mantissa1
== 0
104 && w
.ieee
.mantissa0
== 0)))
106 _Float128 force_underflow
= x
* y
;
107 math_force_eval (force_underflow
);
109 return v
.d
* L(0x1p
-114);
111 if (u
.ieee
.exponent
+ v
.ieee
.exponent
112 >= 0x7fff + IEEE854_LONG_DOUBLE_BIAS
- LDBL_MANT_DIG
)
114 /* Compute 1p-113 times smaller result and multiply
116 if (u
.ieee
.exponent
> v
.ieee
.exponent
)
117 u
.ieee
.exponent
-= LDBL_MANT_DIG
;
119 v
.ieee
.exponent
-= LDBL_MANT_DIG
;
120 /* If x + y exponent is very large and z exponent is very small,
121 it doesn't matter if we don't adjust it. */
122 if (w
.ieee
.exponent
> LDBL_MANT_DIG
)
123 w
.ieee
.exponent
-= LDBL_MANT_DIG
;
126 else if (w
.ieee
.exponent
>= 0x7fff - LDBL_MANT_DIG
)
129 If z exponent is very large and x and y exponents are
130 very small, adjust them up to avoid spurious underflows,
132 if (u
.ieee
.exponent
+ v
.ieee
.exponent
133 <= IEEE854_LONG_DOUBLE_BIAS
+ 2 * LDBL_MANT_DIG
)
135 if (u
.ieee
.exponent
> v
.ieee
.exponent
)
136 u
.ieee
.exponent
+= 2 * LDBL_MANT_DIG
+ 2;
138 v
.ieee
.exponent
+= 2 * LDBL_MANT_DIG
+ 2;
140 else if (u
.ieee
.exponent
> v
.ieee
.exponent
)
142 if (u
.ieee
.exponent
> LDBL_MANT_DIG
)
143 u
.ieee
.exponent
-= LDBL_MANT_DIG
;
145 else if (v
.ieee
.exponent
> LDBL_MANT_DIG
)
146 v
.ieee
.exponent
-= LDBL_MANT_DIG
;
147 w
.ieee
.exponent
-= LDBL_MANT_DIG
;
150 else if (u
.ieee
.exponent
>= 0x7fff - LDBL_MANT_DIG
)
152 u
.ieee
.exponent
-= LDBL_MANT_DIG
;
154 v
.ieee
.exponent
+= LDBL_MANT_DIG
;
158 else if (v
.ieee
.exponent
>= 0x7fff - LDBL_MANT_DIG
)
160 v
.ieee
.exponent
-= LDBL_MANT_DIG
;
162 u
.ieee
.exponent
+= LDBL_MANT_DIG
;
166 else /* if (u.ieee.exponent + v.ieee.exponent
167 <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG) */
169 if (u
.ieee
.exponent
> v
.ieee
.exponent
)
170 u
.ieee
.exponent
+= 2 * LDBL_MANT_DIG
+ 2;
172 v
.ieee
.exponent
+= 2 * LDBL_MANT_DIG
+ 2;
173 if (w
.ieee
.exponent
<= 4 * LDBL_MANT_DIG
+ 6)
176 w
.ieee
.exponent
+= 2 * LDBL_MANT_DIG
+ 2;
181 /* Otherwise x * y should just affect inexact
189 /* Ensure correct sign of exact 0 + 0. */
190 if (__glibc_unlikely ((x
== 0 || y
== 0) && z
== 0))
192 x
= math_opt_barrier (x
);
198 fesetround (FE_TONEAREST
);
200 /* Multiplication m1 + m2 = x * y using Dekker's algorithm. */
201 #define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1)
202 _Float128 x1
= x
* C
;
203 _Float128 y1
= y
* C
;
204 _Float128 m1
= x
* y
;
207 _Float128 x2
= x
- x1
;
208 _Float128 y2
= y
- y1
;
209 _Float128 m2
= (((x1
* y1
- m1
) + x1
* y2
) + x2
* y1
) + x2
* y2
;
211 /* Addition a1 + a2 = z + m1 using Knuth's algorithm. */
212 _Float128 a1
= z
+ m1
;
213 _Float128 t1
= a1
- z
;
214 _Float128 t2
= a1
- t1
;
217 _Float128 a2
= t1
+ t2
;
218 /* Ensure the arithmetic is not scheduled after feclearexcept call. */
219 math_force_eval (m2
);
220 math_force_eval (a2
);
221 feclearexcept (FE_INEXACT
);
223 /* If the result is an exact zero, ensure it has the correct sign. */
224 if (a1
== 0 && m2
== 0)
227 /* Ensure that round-to-nearest value of z + m1 is not reused. */
228 z
= math_opt_barrier (z
);
232 fesetround (FE_TOWARDZERO
);
233 /* Perform m2 + a2 addition with round to odd. */
236 if (__glibc_likely (adjust
== 0))
238 if ((u
.ieee
.mantissa3
& 1) == 0 && u
.ieee
.exponent
!= 0x7fff)
239 u
.ieee
.mantissa3
|= fetestexcept (FE_INEXACT
) != 0;
241 /* Result is a1 + u.d. */
244 else if (__glibc_likely (adjust
> 0))
246 if ((u
.ieee
.mantissa3
& 1) == 0 && u
.ieee
.exponent
!= 0x7fff)
247 u
.ieee
.mantissa3
|= fetestexcept (FE_INEXACT
) != 0;
249 /* Result is a1 + u.d, scaled up. */
250 return (a1
+ u
.d
) * L(0x1p
113);
254 if ((u
.ieee
.mantissa3
& 1) == 0)
255 u
.ieee
.mantissa3
|= fetestexcept (FE_INEXACT
) != 0;
257 /* Ensure the addition is not scheduled after fetestexcept call. */
258 math_force_eval (v
.d
);
259 int j
= fetestexcept (FE_INEXACT
) != 0;
261 /* Ensure the following computations are performed in default rounding
262 mode instead of just reusing the round to zero computation. */
263 asm volatile ("" : "=m" (u
) : "m" (u
));
264 /* If a1 + u.d is exact, the only rounding happens during
267 return v
.d
* L(0x1p
-228);
268 /* If result rounded to zero is not subnormal, no double
269 rounding will occur. */
270 if (v
.ieee
.exponent
> 228)
271 return (a1
+ u
.d
) * L(0x1p
-228);
272 /* If v.d * 0x1p-228L with round to zero is a subnormal above
273 or equal to LDBL_MIN / 2, then v.d * 0x1p-228L shifts mantissa
274 down just by 1 bit, which means v.ieee.mantissa3 |= j would
275 change the round bit, not sticky or guard bit.
276 v.d * 0x1p-228L never normalizes by shifting up,
277 so round bit plus sticky bit should be already enough
278 for proper rounding. */
279 if (v
.ieee
.exponent
== 228)
281 /* If the exponent would be in the normal range when
282 rounding to normal precision with unbounded exponent
283 range, the exact result is known and spurious underflows
284 must be avoided on systems detecting tininess after
286 if (TININESS_AFTER_ROUNDING
)
289 if (w
.ieee
.exponent
== 229)
290 return w
.d
* L(0x1p
-228);
292 /* v.ieee.mantissa3 & 2 is LSB bit of the result before rounding,
293 v.ieee.mantissa3 & 1 is the round bit and j is our sticky
296 w
.ieee
.mantissa3
= ((v
.ieee
.mantissa3
& 3) << 1) | j
;
297 w
.ieee
.negative
= v
.ieee
.negative
;
298 v
.ieee
.mantissa3
&= ~3U;
303 v
.ieee
.mantissa3
|= j
;
304 return v
.d
* L(0x1p
-228);
306 #endif /* ! USE_FMAL_BUILTIN */
308 libm_alias_ldouble (__fma
, fma
)
309 libm_alias_ldouble_narrow (__fma
, fma
)