2 * ====================================================
3 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
5 * Developed at SunPro, a Sun Microsystems, Inc. business.
6 * Permission to use, copy, modify, and distribute this
7 * software is freely granted, provided that this notice
9 * ====================================================
12 /* Expansions and modifications for 128-bit long double are
13 Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
14 and are incorporated herein by permission of the author. The author
15 reserves the right to distribute this material elsewhere under different
16 copying permissions. These modifications are distributed here under
19 This library is free software; you can redistribute it and/or
20 modify it under the terms of the GNU Lesser General Public
21 License as published by the Free Software Foundation; either
22 version 2.1 of the License, or (at your option) any later version.
24 This library is distributed in the hope that it will be useful,
25 but WITHOUT ANY WARRANTY; without even the implied warranty of
26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27 Lesser General Public License for more details.
29 You should have received a copy of the GNU Lesser General Public
30 License along with this library; if not, see
31 <https://www.gnu.org/licenses/>. */
33 /* __ieee754_powl(x,y) return x**y
36 * Method: Let x = 2 * (1+f)
37 * 1. Compute and return log2(x) in two pieces:
39 * where w1 has 113-53 = 60 bit trailing zeros.
40 * 2. Perform y*log2(x) = n+y' by simulating muti-precision
41 * arithmetic, where |y'|<=0.5.
42 * 3. Return x**y = 2**n*exp(y'*log2)
45 * 1. (anything) ** 0 is 1
46 * 2. (anything) ** 1 is itself
47 * 3. (anything) ** NAN is NAN
48 * 4. NAN ** (anything except 0) is NAN
49 * 5. +-(|x| > 1) ** +INF is +INF
50 * 6. +-(|x| > 1) ** -INF is +0
51 * 7. +-(|x| < 1) ** +INF is +0
52 * 8. +-(|x| < 1) ** -INF is +INF
53 * 9. +-1 ** +-INF is NAN
54 * 10. +0 ** (+anything except 0, NAN) is +0
55 * 11. -0 ** (+anything except 0, NAN, odd integer) is +0
56 * 12. +0 ** (-anything except 0, NAN) is +INF
57 * 13. -0 ** (-anything except 0, NAN, odd integer) is +INF
58 * 14. -0 ** (odd integer) = -( +0 ** (odd integer) )
59 * 15. +INF ** (+anything except 0,NAN) is +INF
60 * 16. +INF ** (-anything except 0,NAN) is +0
61 * 17. -INF ** (anything) = -0 ** (-anything)
62 * 18. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
63 * 19. (-anything except 0 and inf) ** (non-integer) is NAN
68 #include <math-barriers.h>
69 #include <math_private.h>
70 #include <libm-alias-finite.h>
72 static const _Float128 bp
[] = {
78 static const _Float128 dp_h
[] = {
80 L(5.8496250072115607565592654282227158546448E-1)
83 /* Low part of log_2(1.5) */
84 static const _Float128 dp_l
[] = {
86 L(1.0579781240112554492329533686862998106046E-16)
89 static const _Float128 zero
= 0,
92 two113
= L(1.0384593717069655257060992658440192E34
),
96 /* 3/2 log x = 3 z + z^3 + z^3 (z^2 R(z^2))
99 Peak relative error 2.3e-37 */
100 static const _Float128 LN
[] =
102 L(-3.0779177200290054398792536829702930623200E1
),
103 L(6.5135778082209159921251824580292116201640E1
),
104 L(-4.6312921812152436921591152809994014413540E1
),
105 L(1.2510208195629420304615674658258363295208E1
),
106 L(-9.9266909031921425609179910128531667336670E-1)
108 static const _Float128 LD
[] =
110 L(-5.129862866715009066465422805058933131960E1
),
111 L(1.452015077564081884387441590064272782044E2
),
112 L(-1.524043275549860505277434040464085593165E2
),
113 L(7.236063513651544224319663428634139768808E1
),
114 L(-1.494198912340228235853027849917095580053E1
)
118 /* exp(x) = 1 + x - x / (1 - 2 / (x - x^2 R(x^2)))
120 Peak relative error 5.7e-38 */
121 static const _Float128 PN
[] =
123 L(5.081801691915377692446852383385968225675E8
),
124 L(9.360895299872484512023336636427675327355E6
),
125 L(4.213701282274196030811629773097579432957E4
),
126 L(5.201006511142748908655720086041570288182E1
),
127 L(9.088368420359444263703202925095675982530E-3),
129 static const _Float128 PD
[] =
131 L(3.049081015149226615468111430031590411682E9
),
132 L(1.069833887183886839966085436512368982758E8
),
133 L(8.259257717868875207333991924545445705394E5
),
134 L(1.872583833284143212651746812884298360922E3
),
138 static const _Float128
140 lg2
= L(6.9314718055994530941723212145817656807550E-1),
141 lg2_h
= L(6.9314718055994528622676398299518041312695E-1),
142 lg2_l
= L(2.3190468138462996154948554638754786504121E-17),
143 ovt
= L(8.0085662595372944372e-0017),
145 cp
= L(9.6179669392597560490661645400126142495110E-1),
146 cp_h
= L(9.6179669392597555432899980587535537779331E-1),
147 cp_l
= L(5.0577616648125906047157785230014751039424E-17);
150 __ieee754_powl (_Float128 x
, _Float128 y
)
152 _Float128 z
, ax
, z_h
, z_l
, p_h
, p_l
;
153 _Float128 y1
, t1
, t2
, r
, s
, sgn
, t
, u
, v
, w
;
154 _Float128 s2
, s_h
, s_l
, t_h
, t_l
, ay
;
155 int32_t i
, j
, k
, yisint
, n
;
158 ieee854_long_double_shape_type o
, p
, q
;
162 ix
= hx
& 0x7fffffff;
166 iy
= hy
& 0x7fffffff;
169 /* y==zero: x**0 = 1 */
170 if ((iy
| q
.parts32
.w1
| q
.parts32
.w2
| q
.parts32
.w3
) == 0
174 /* 1.0**y = 1; -1.0**+-Inf = 1 */
175 if (x
== one
&& !issignaling (y
))
177 if (x
== -1 && iy
== 0x7fff0000
178 && (q
.parts32
.w1
| q
.parts32
.w2
| q
.parts32
.w3
) == 0)
181 /* +-NaN return x+y */
182 if ((ix
> 0x7fff0000)
183 || ((ix
== 0x7fff0000)
184 && ((p
.parts32
.w1
| p
.parts32
.w2
| p
.parts32
.w3
) != 0))
186 || ((iy
== 0x7fff0000)
187 && ((q
.parts32
.w1
| q
.parts32
.w2
| q
.parts32
.w3
) != 0)))
190 /* determine if y is an odd int when x < 0
191 * yisint = 0 ... y is not an integer
192 * yisint = 1 ... y is an odd int
193 * yisint = 2 ... y is an even int
198 if (iy
>= 0x40700000) /* 2^113 */
199 yisint
= 2; /* even integer y */
200 else if (iy
>= 0x3fff0000) /* 1.0 */
213 /* special value of y */
214 if ((q
.parts32
.w1
| q
.parts32
.w2
| q
.parts32
.w3
) == 0)
216 if (iy
== 0x7fff0000) /* y is +-inf */
218 if (((ix
- 0x3fff0000) | p
.parts32
.w1
| p
.parts32
.w2
| p
.parts32
.w3
)
220 return y
- y
; /* +-1**inf is NaN */
221 else if (ix
>= 0x3fff0000) /* (|x|>1)**+-inf = inf,0 */
222 return (hy
>= 0) ? y
: zero
;
223 else /* (|x|<1)**-,+inf = inf,0 */
224 return (hy
< 0) ? -y
: zero
;
226 if (iy
== 0x3fff0000)
233 if (hy
== 0x40000000)
234 return x
* x
; /* y is 2 */
235 if (hy
== 0x3ffe0000)
237 if (hx
>= 0) /* x >= +0 */
243 /* special value of x */
244 if ((p
.parts32
.w1
| p
.parts32
.w2
| p
.parts32
.w3
) == 0)
246 if (ix
== 0x7fff0000 || ix
== 0 || ix
== 0x3fff0000)
248 z
= ax
; /*x is +-0,+-inf,+-1 */
250 z
= one
/ z
; /* z = (1/|x|) */
253 if (((ix
- 0x3fff0000) | yisint
) == 0)
255 z
= (z
- z
) / (z
- z
); /* (-1)**non-int is NaN */
257 else if (yisint
== 1)
258 z
= -z
; /* (x<0)**odd = -(|x|**odd) */
264 /* (x<0)**(non-int) is NaN */
265 if (((((uint32_t) hx
>> 31) - 1) | yisint
) == 0)
266 return (x
- x
) / (x
- x
);
268 /* sgn (sign of result -ve**odd) = -1 else = 1 */
270 if (((((uint32_t) hx
>> 31) - 1) | (yisint
- 1)) == 0)
271 sgn
= -one
; /* (-ve)**(odd int) */
274 2^-16495 = 1/2 of smallest representable value.
275 If (1 - 1/131072)^y underflows, y > 1.4986e9 */
278 /* if (1 - 2^-113)^y underflows, y > 1.1873e38 */
281 if (ix
<= 0x3ffeffff)
282 return (hy
< 0) ? huge
* huge
: tiny
* tiny
;
283 if (ix
>= 0x3fff0000)
284 return (hy
> 0) ? huge
* huge
: tiny
* tiny
;
286 /* over/underflow if x is not close to one */
288 return (hy
< 0) ? sgn
* huge
* huge
: sgn
* tiny
* tiny
;
290 return (hy
> 0) ? sgn
* huge
* huge
: sgn
* tiny
* tiny
;
295 y
= y
< 0 ? -0x1p
-128 : 0x1p
-128;
298 /* take care subnormal number */
306 n
+= ((ix
) >> 16) - 0x3fff;
308 /* determine interval */
309 ix
= j
| 0x3fff0000; /* normalize ix */
311 k
= 0; /* |x|<sqrt(3/2) */
313 k
= 1; /* |x|<sqrt(3) */
325 /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
326 u
= ax
- bp
[k
]; /* bp[0]=1.0, bp[1]=1.5 */
327 v
= one
/ (ax
+ bp
[k
]);
333 o
.parts32
.w2
&= 0xf8000000;
335 /* t_h=ax+bp[k] High */
339 o
.parts32
.w2
&= 0xf8000000;
341 t_l
= ax
- (t_h
- bp
[k
]);
342 s_l
= v
* ((u
- s_h
* t_h
) - s_h
* t_l
);
343 /* compute log(ax) */
345 u
= LN
[0] + s2
* (LN
[1] + s2
* (LN
[2] + s2
* (LN
[3] + s2
* LN
[4])));
346 v
= LD
[0] + s2
* (LD
[1] + s2
* (LD
[2] + s2
* (LD
[3] + s2
* (LD
[4] + s2
))));
348 r
+= s_l
* (s_h
+ s
);
353 o
.parts32
.w2
&= 0xf8000000;
355 t_l
= r
- ((t_h
- 3.0) - s2
);
356 /* u+v = s*(1+...) */
358 v
= s_l
* t_h
+ t_l
* s
;
359 /* 2/(3log2)*(s+...) */
363 o
.parts32
.w2
&= 0xf8000000;
366 z_h
= cp_h
* p_h
; /* cp_h+cp_l = 2/(3*log2) */
367 z_l
= cp_l
* p_h
+ p_l
* cp
+ dp_l
[k
];
368 /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
370 t1
= (((z_h
+ z_l
) + dp_h
[k
]) + t
);
373 o
.parts32
.w2
&= 0xf8000000;
375 t2
= z_l
- (((t1
- t
) - dp_h
[k
]) - z_h
);
377 /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
381 o
.parts32
.w2
&= 0xf8000000;
383 p_l
= (y
- y1
) * t1
+ y
* t2
;
388 if (j
>= 0x400d0000) /* z >= 16384 */
391 if (((j
- 0x400d0000) | o
.parts32
.w1
| o
.parts32
.w2
| o
.parts32
.w3
) != 0)
392 return sgn
* huge
* huge
; /* overflow */
395 if (p_l
+ ovt
> z
- p_h
)
396 return sgn
* huge
* huge
; /* overflow */
399 else if ((j
& 0x7fffffff) >= 0x400d01b9) /* z <= -16495 */
402 if (((j
- 0xc00d01bc) | o
.parts32
.w1
| o
.parts32
.w2
| o
.parts32
.w3
)
404 return sgn
* tiny
* tiny
; /* underflow */
408 return sgn
* tiny
* tiny
; /* underflow */
411 /* compute 2**(p_h+p_l) */
413 k
= (i
>> 16) - 0x3fff;
416 { /* if |z| > 0.5, set n = [z+0.5] */
417 n
= floorl (z
+ L(0.5));
424 o
.parts32
.w2
&= 0xf8000000;
427 v
= (p_l
- (t
- p_h
)) * lg2
+ t
* lg2_l
;
432 u
= PN
[0] + t
* (PN
[1] + t
* (PN
[2] + t
* (PN
[3] + t
* PN
[4])));
433 v
= PD
[0] + t
* (PD
[1] + t
* (PD
[2] + t
* (PD
[3] + t
)));
435 r
= (z
* t1
) / (t1
- two
) - (w
+ z
* w
);
442 z
= __scalbnl (z
, n
); /* subnormal output */
443 _Float128 force_underflow
= z
* z
;
444 math_force_eval (force_underflow
);
453 libm_alias_finite (__ieee754_powl
, __powl
)