1 /* Implement powl for x86 using extra-precision log.
2 Copyright (C) 2012-2015 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
20 #include <math_private.h>
23 /* High parts and low parts of -log (k/16), for integer k from 12 to
26 static const long double powl_log_table
[] =
28 0x4.9a58844d36e49e1p
-4L, -0x1.0522624fd558f574p
-68L,
29 0x3.527da7915b3c6de4p
-4L, 0x1.7d4ef4b901b99b9ep
-68L,
30 0x2.22f1d044fc8f7bc8p
-4L, -0x1.8e97c071a42fc388p
-68L,
31 0x1.08598b59e3a0688ap
-4L, 0x3.fd9bf503372c12fcp
-72L,
33 -0xf.85186008b15330cp
-8L, 0x1.9b47488a6687672cp
-72L,
34 -0x1.e27076e2af2e5e9ep
-4L, -0xa.87ffe1fe9e155dcp
-72L,
35 -0x2.bfe60e14f27a791p
-4L, 0x1.83bebf1bdb88a032p
-68L,
36 -0x3.91fef8f353443584p
-4L, -0xb.b03de5ff734495cp
-72L,
37 -0x4.59d72aeae98380e8p
-4L, 0xc.e0aa3be4747dc1p
-72L,
38 -0x5.1862f08717b09f4p
-4L, -0x2.decdeccf1cd10578p
-68L,
39 -0x5.ce75fdaef401a738p
-4L, -0x9.314feb4fbde5aaep
-72L,
40 -0x6.7cc8fb2fe612fcbp
-4L, 0x2.5ca2642feb779f98p
-68L,
43 /* High 32 bits of log2 (e), and remainder rounded to 64 bits. */
44 static const long double log2e_hi
= 0x1.71547652p
+0L;
45 static const long double log2e_lo
= 0xb.82fe1777d0ffda1p
-36L;
47 /* Given a number with high part HI and low part LO, add the number X
48 to it and store the result in *RHI and *RLO. It is given that
49 either |X| < |0.7 * HI|, or HI == LO == 0, and that the values are
50 small enough that no overflow occurs. The result does not need to
51 be exact to 128 bits; 78-bit accuracy of the final accumulated
55 acc_split (long double *rhi
, long double *rlo
, long double hi
, long double lo
,
58 long double thi
= hi
+ x
;
59 long double tlo
= (hi
- thi
) + x
+ lo
;
61 *rlo
= (thi
- *rhi
) + tlo
;
64 extern long double __powl_helper (long double x
, long double y
);
65 libm_hidden_proto (__powl_helper
)
67 /* Given X a value that is finite and nonzero, or a NaN, and Y a
68 finite nonzero value with 0x1p-79 <= |Y| <= 0x1p78, compute X to
72 __powl_helper (long double x
, long double y
)
75 return __ieee754_expl (y
* __ieee754_logl (x
));
79 long double absy
= fabsl (y
);
84 unsigned long long yll
= absy
;
86 return __ieee754_expl (y
* __ieee754_logl (x
));
87 negate
= (yll
& 1) != 0;
94 /* We need to compute Y * log2 (X) to at least 64 bits after the
95 point for normal results (that is, to at least 78 bits
99 x_frac
= __frexpl (x
, &x_int_exponent
);
100 if (x_frac
<= 0x0.aaaaaaaaaaaaaaaap0L
) /* 2.0L / 3.0L, rounded down */
106 long double log_x_frac_hi
, log_x_frac_lo
;
107 /* Determine an initial approximation to log (X_FRAC) using
108 POWL_LOG_TABLE, and multiply by a value K/16 to reduce to an
109 interval (24/25, 26/25). */
110 int k
= (int) ((16.0L / x_frac
) + 0.5L);
111 log_x_frac_hi
= powl_log_table
[2 * k
- 24];
112 log_x_frac_lo
= powl_log_table
[2 * k
- 23];
113 long double x_frac_low
;
118 /* Mask off low 5 bits of X_FRAC so the multiplication by K/16
119 is exact. These bits are small enough that they can be
120 corrected for by adding log2 (e) * X_FRAC_LOW to the final
124 GET_LDOUBLE_WORDS (se
, i0
, i1
, x_frac
);
127 SET_LDOUBLE_WORDS (x_frac
, se
, i0
, i1
);
128 x_frac_low
-= x_frac
;
129 x_frac_low
/= x_frac
;
133 /* Now compute log (X_FRAC) for X_FRAC in (24/25, 26/25). Separate
134 W = X_FRAC - 1 into high 16 bits and remaining bits, so that
135 multiplications for low-order power series terms are exact. The
136 remaining bits are small enough that adding a 64-bit value of
137 log2 (1 + W_LO / (1 + W_HI)) will be a sufficient correction for
139 long double w
= x_frac
- 1;
140 long double w_hi
, w_lo
;
143 GET_LDOUBLE_WORDS (se
, i0
, i1
, w
);
146 SET_LDOUBLE_WORDS (w_hi
, se
, i0
, i1
);
148 long double wp
= w_hi
;
149 acc_split (&log_x_frac_hi
, &log_x_frac_lo
, log_x_frac_hi
, log_x_frac_lo
, wp
);
151 acc_split (&log_x_frac_hi
, &log_x_frac_lo
, log_x_frac_hi
, log_x_frac_lo
,
154 acc_split (&log_x_frac_hi
, &log_x_frac_lo
, log_x_frac_hi
, log_x_frac_lo
,
155 wp
* 0x0.5555p0L
); /* -W_HI**3 / 3, high part. */
156 acc_split (&log_x_frac_hi
, &log_x_frac_lo
, log_x_frac_hi
, log_x_frac_lo
,
157 wp
* 0x0.5555555555555555p
-16L); /* -W_HI**3 / 3, low part. */
159 acc_split (&log_x_frac_hi
, &log_x_frac_lo
, log_x_frac_hi
, log_x_frac_lo
,
161 /* Subsequent terms are small enough that they only need be computed
163 for (int i
= 5; i
<= 17; i
++)
166 acc_split (&log_x_frac_hi
, &log_x_frac_lo
, log_x_frac_hi
, log_x_frac_lo
,
170 /* Convert LOG_X_FRAC_HI + LOG_X_FRAC_LO to a base-2 logarithm. */
171 long double log2_x_frac_hi
, log2_x_frac_lo
;
172 long double log_x_frac_hi32
, log_x_frac_lo64
;
173 GET_LDOUBLE_WORDS (se
, i0
, i1
, log_x_frac_hi
);
175 SET_LDOUBLE_WORDS (log_x_frac_hi32
, se
, i0
, i1
);
176 log_x_frac_lo64
= (log_x_frac_hi
- log_x_frac_hi32
) + log_x_frac_lo
;
177 long double log2_x_frac_hi1
= log_x_frac_hi32
* log2e_hi
;
178 long double log2_x_frac_lo1
179 = log_x_frac_lo64
* log2e_hi
+ log_x_frac_hi
* log2e_lo
;
180 log2_x_frac_hi
= log2_x_frac_hi1
+ log2_x_frac_lo1
;
181 log2_x_frac_lo
= (log2_x_frac_hi1
- log2_x_frac_hi
) + log2_x_frac_lo1
;
183 /* Correct for the masking off of W_LO. */
184 long double log2_1p_w_lo
;
186 : "=t" (log2_1p_w_lo
)
187 : "0" (w_lo
/ (1.0L + w_hi
)), "u" (1.0L)
189 acc_split (&log2_x_frac_hi
, &log2_x_frac_lo
, log2_x_frac_hi
, log2_x_frac_lo
,
192 /* Correct for the masking off of X_FRAC_LOW. */
193 acc_split (&log2_x_frac_hi
, &log2_x_frac_lo
, log2_x_frac_hi
, log2_x_frac_lo
,
194 x_frac_low
* M_LOG2El
);
196 /* Add the integer and fractional parts of the base-2 logarithm. */
197 long double log2_x_hi
, log2_x_lo
;
198 log2_x_hi
= x_int_exponent
+ log2_x_frac_hi
;
199 log2_x_lo
= ((x_int_exponent
- log2_x_hi
) + log2_x_frac_hi
) + log2_x_frac_lo
;
201 /* Compute the base-2 logarithm of the result. */
202 long double log2_res_hi
, log2_res_lo
;
203 long double log2_x_hi32
, log2_x_lo64
;
204 GET_LDOUBLE_WORDS (se
, i0
, i1
, log2_x_hi
);
206 SET_LDOUBLE_WORDS (log2_x_hi32
, se
, i0
, i1
);
207 log2_x_lo64
= (log2_x_hi
- log2_x_hi32
) + log2_x_lo
;
208 long double y_hi32
, y_lo32
;
209 GET_LDOUBLE_WORDS (se
, i0
, i1
, y
);
211 SET_LDOUBLE_WORDS (y_hi32
, se
, i0
, i1
);
213 log2_res_hi
= log2_x_hi32
* y_hi32
;
214 log2_res_lo
= log2_x_hi32
* y_lo32
+ log2_x_lo64
* y
;
216 /* Split the base-2 logarithm of the result into integer and
218 long double log2_res_int
= __roundl (log2_res_hi
);
219 long double log2_res_frac
= log2_res_hi
- log2_res_int
+ log2_res_lo
;
220 /* If the integer part is very large, the computed fractional part
221 may be outside the valid range for f2xm1. */
222 if (fabsl (log2_res_int
) > 16500)
225 /* Compute the final result. */
227 asm ("f2xm1" : "=t" (res
) : "0" (log2_res_frac
));
231 asm ("fscale" : "=t" (res
) : "0" (res
), "u" (log2_res_int
));
235 libm_hidden_def (__powl_helper
)