1 /* Compute x * y + z as ternary operation.
2 Copyright (C) 2011-2016 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by David Flaherty <flaherty@linux.vnet.ibm.com>.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
23 #include <math_private.h>
24 #include <math_ldbl_opt.h>
25 #include <mul_split.h>
28 /* Calculate X + Y exactly and store the result in *HI + *LO. It is
29 given that |X| >= |Y| and the values are small enough that no
33 add_split (double *hi
, double *lo
, double x
, double y
)
35 /* Apply Dekker's algorithm. */
40 /* Value with extended range, used in intermediate computations. */
43 /* Value in [0.5, 1), as from frexp, or 0. */
45 /* Exponent of power of 2 it is multiplied by, or 0 for zero. */
49 /* Store D as an ext_val value. */
52 store_ext_val (ext_val
*v
, double d
)
54 v
->val
= __frexp (d
, &v
->exp
);
57 /* Store X * Y as ext_val values *V0 and *V1. */
60 mul_ext_val (ext_val
*v0
, ext_val
*v1
, double x
, double y
)
63 x
= __frexp (x
, &xexp
);
64 y
= __frexp (y
, &yexp
);
66 mul_split (&hi
, &lo
, x
, y
);
67 store_ext_val (v0
, hi
);
69 v0
->exp
+= xexp
+ yexp
;
70 store_ext_val (v1
, lo
);
72 v1
->exp
+= xexp
+ yexp
;
75 /* Compare absolute values of ext_val values pointed to by P and Q for
79 compare (const void *p
, const void *q
)
81 const ext_val
*pe
= p
;
82 const ext_val
*qe
= q
;
84 return qe
->val
== 0 ? 0 : -1;
85 else if (qe
->val
== 0)
87 else if (pe
->exp
< qe
->exp
)
89 else if (pe
->exp
> qe
->exp
)
93 double pd
= fabs (pe
->val
);
94 double qd
= fabs (qe
->val
);
104 /* Calculate *X + *Y exactly, storing the high part in *X (rounded to
105 nearest) and the low part in *Y. It is given that |X| >= |Y|. */
108 add_split_ext (ext_val
*x
, ext_val
*y
)
110 int xexp
= x
->exp
, yexp
= y
->exp
;
111 if (y
->val
== 0 || xexp
- yexp
> 53)
114 double lo
= __scalbn (y
->val
, yexp
- xexp
);
115 add_split (&hi
, &lo
, hi
, lo
);
116 store_ext_val (x
, hi
);
119 store_ext_val (y
, lo
);
125 __fmal (long double x
, long double y
, long double z
)
127 double xhi
, xlo
, yhi
, ylo
, zhi
, zlo
;
129 int xexp
, yexp
, zexp
;
132 ldbl_unpack (x
, &xhi
, &xlo
);
133 EXTRACT_WORDS64 (hx
, xhi
);
134 xexp
= (hx
& 0x7ff0000000000000LL
) >> 52;
135 ldbl_unpack (y
, &yhi
, &ylo
);
136 EXTRACT_WORDS64 (hy
, yhi
);
137 yexp
= (hy
& 0x7ff0000000000000LL
) >> 52;
138 ldbl_unpack (z
, &zhi
, &zlo
);
139 EXTRACT_WORDS64 (hz
, zhi
);
140 zexp
= (hz
& 0x7ff0000000000000LL
) >> 52;
142 /* If z is Inf or NaN, but x and y are finite, avoid any exceptions
143 from computing x * y. */
144 if (zexp
== 0x7ff && xexp
!= 0x7ff && yexp
!= 0x7ff)
147 /* If z is zero and x are y are nonzero, compute the result as x * y
148 to avoid the wrong sign of a zero result if x * y underflows to
150 if (z
== 0 && x
!= 0 && y
!= 0)
153 /* If x or y or z is Inf/NaN, or if x * y is zero, compute as x * y
155 if (xexp
== 0x7ff || yexp
== 0x7ff || zexp
== 0x7ff
160 SET_RESTORE_ROUND (FE_TONEAREST
);
163 store_ext_val (&vals
[0], zhi
);
164 store_ext_val (&vals
[1], zlo
);
165 mul_ext_val (&vals
[2], &vals
[3], xhi
, yhi
);
166 mul_ext_val (&vals
[4], &vals
[5], xhi
, ylo
);
167 mul_ext_val (&vals
[6], &vals
[7], xlo
, yhi
);
168 mul_ext_val (&vals
[8], &vals
[9], xlo
, ylo
);
169 qsort (vals
, 10, sizeof (ext_val
), compare
);
170 /* Add up the values so that each element of VALS has absolute
171 value at most equal to the last set bit of the next nonzero
173 for (size_t i
= 0; i
<= 8; i
++)
175 add_split_ext (&vals
[i
+ 1], &vals
[i
]);
176 qsort (vals
+ i
+ 1, 9 - i
, sizeof (ext_val
), compare
);
178 /* Add up the values in the other direction, so that each element
179 of VALS has absolute value less than 5ulp of the next
182 for (size_t i
= 1; i
<= 9; i
++)
184 if (vals
[dstpos
].val
== 0)
186 vals
[dstpos
] = vals
[9 - i
];
192 add_split_ext (&vals
[dstpos
], &vals
[9 - i
]);
193 if (vals
[9 - i
].val
!= 0)
195 if (9 - i
< dstpos
- 1)
197 vals
[dstpos
- 1] = vals
[9 - i
];
205 /* If the result is an exact zero, it results from adding two
206 values with opposite signs; recompute in the original rounding
208 if (vals
[9].val
== 0)
210 /* Adding the top three values will now give a result as accurate
211 as the underlying long double arithmetic. */
212 add_split_ext (&vals
[9], &vals
[8]);
213 if (compare (&vals
[8], &vals
[7]) < 0)
215 ext_val tmp
= vals
[7];
219 add_split_ext (&vals
[8], &vals
[7]);
220 add_split_ext (&vals
[9], &vals
[8]);
221 if (vals
[9].exp
> DBL_MAX_EXP
|| vals
[9].exp
< DBL_MIN_EXP
)
223 /* Overflow or underflow, with the result depending on the
224 original rounding mode, but not on the low part computed
226 scale_val
= vals
[9].val
;
227 scale_exp
= vals
[9].exp
;
230 double hi
= __scalbn (vals
[9].val
, vals
[9].exp
);
231 double lo
= __scalbn (vals
[8].val
, vals
[8].exp
);
232 /* It is possible that the low part became subnormal and was
233 rounded so that the result is no longer canonical. */
234 ldbl_canonicalize (&hi
, &lo
);
235 long double ret
= ldbl_pack (hi
, lo
);
236 math_check_force_underflow (ret
);
241 scale_val
= math_opt_barrier (scale_val
);
242 scale_val
= __scalbn (scale_val
, scale_exp
);
243 if (fabs (scale_val
) == DBL_MAX
)
244 return __copysignl (LDBL_MAX
, scale_val
);
245 math_check_force_underflow (scale_val
);
250 zero
= math_opt_barrier (zero
);
254 long_double_symbol (libm
, __fmal
, fmal
);
256 long_double_symbol (libc
, __fmal
, fmal
);