2.9
[glibc/nacl-glibc.git] / sysdeps / ieee754 / ldbl-128ibm / e_powl.c
blobfeeaa8ce215451072e3e45d4874ddb0ac432c4dd
1 /*
2 * ====================================================
3 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
5 * Developed at SunPro, a Sun Microsystems, Inc. business.
6 * Permission to use, copy, modify, and distribute this
7 * software is freely granted, provided that this notice
8 * is preserved.
9 * ====================================================
12 /* Expansions and modifications for 128-bit long double are
13 Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
14 and are incorporated herein by permission of the author. The author
15 reserves the right to distribute this material elsewhere under different
16 copying permissions. These modifications are distributed here under
17 the following terms:
19 This library is free software; you can redistribute it and/or
20 modify it under the terms of the GNU Lesser General Public
21 License as published by the Free Software Foundation; either
22 version 2.1 of the License, or (at your option) any later version.
24 This library is distributed in the hope that it will be useful,
25 but WITHOUT ANY WARRANTY; without even the implied warranty of
26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27 Lesser General Public License for more details.
29 You should have received a copy of the GNU Lesser General Public
30 License along with this library; if not, write to the Free Software
31 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
33 /* __ieee754_powl(x,y) return x**y
35 * n
36 * Method: Let x = 2 * (1+f)
37 * 1. Compute and return log2(x) in two pieces:
38 * log2(x) = w1 + w2,
39 * where w1 has 113-53 = 60 bit trailing zeros.
40 * 2. Perform y*log2(x) = n+y' by simulating muti-precision
41 * arithmetic, where |y'|<=0.5.
42 * 3. Return x**y = 2**n*exp(y'*log2)
44 * Special cases:
45 * 1. (anything) ** 0 is 1
46 * 2. (anything) ** 1 is itself
47 * 3. (anything) ** NAN is NAN
48 * 4. NAN ** (anything except 0) is NAN
49 * 5. +-(|x| > 1) ** +INF is +INF
50 * 6. +-(|x| > 1) ** -INF is +0
51 * 7. +-(|x| < 1) ** +INF is +0
52 * 8. +-(|x| < 1) ** -INF is +INF
53 * 9. +-1 ** +-INF is NAN
54 * 10. +0 ** (+anything except 0, NAN) is +0
55 * 11. -0 ** (+anything except 0, NAN, odd integer) is +0
56 * 12. +0 ** (-anything except 0, NAN) is +INF
57 * 13. -0 ** (-anything except 0, NAN, odd integer) is +INF
58 * 14. -0 ** (odd integer) = -( +0 ** (odd integer) )
59 * 15. +INF ** (+anything except 0,NAN) is +INF
60 * 16. +INF ** (-anything except 0,NAN) is +0
61 * 17. -INF ** (anything) = -0 ** (-anything)
62 * 18. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
63 * 19. (-anything except 0 and inf) ** (non-integer) is NAN
67 #include "math.h"
68 #include "math_private.h"
70 static const long double bp[] = {
71 1.0L,
72 1.5L,
75 /* log_2(1.5) */
76 static const long double dp_h[] = {
77 0.0,
78 5.8496250072115607565592654282227158546448E-1L
81 /* Low part of log_2(1.5) */
82 static const long double dp_l[] = {
83 0.0,
84 1.0579781240112554492329533686862998106046E-16L
87 static const long double zero = 0.0L,
88 one = 1.0L,
89 two = 2.0L,
90 two113 = 1.0384593717069655257060992658440192E34L,
91 huge = 1.0e3000L,
92 tiny = 1.0e-3000L;
94 /* 3/2 log x = 3 z + z^3 + z^3 (z^2 R(z^2))
95 z = (x-1)/(x+1)
96 1 <= x <= 1.25
97 Peak relative error 2.3e-37 */
98 static const long double LN[] =
100 -3.0779177200290054398792536829702930623200E1L,
101 6.5135778082209159921251824580292116201640E1L,
102 -4.6312921812152436921591152809994014413540E1L,
103 1.2510208195629420304615674658258363295208E1L,
104 -9.9266909031921425609179910128531667336670E-1L
106 static const long double LD[] =
108 -5.129862866715009066465422805058933131960E1L,
109 1.452015077564081884387441590064272782044E2L,
110 -1.524043275549860505277434040464085593165E2L,
111 7.236063513651544224319663428634139768808E1L,
112 -1.494198912340228235853027849917095580053E1L
113 /* 1.0E0 */
116 /* exp(x) = 1 + x - x / (1 - 2 / (x - x^2 R(x^2)))
117 0 <= x <= 0.5
118 Peak relative error 5.7e-38 */
119 static const long double PN[] =
121 5.081801691915377692446852383385968225675E8L,
122 9.360895299872484512023336636427675327355E6L,
123 4.213701282274196030811629773097579432957E4L,
124 5.201006511142748908655720086041570288182E1L,
125 9.088368420359444263703202925095675982530E-3L,
127 static const long double PD[] =
129 3.049081015149226615468111430031590411682E9L,
130 1.069833887183886839966085436512368982758E8L,
131 8.259257717868875207333991924545445705394E5L,
132 1.872583833284143212651746812884298360922E3L,
133 /* 1.0E0 */
136 static const long double
137 /* ln 2 */
138 lg2 = 6.9314718055994530941723212145817656807550E-1L,
139 lg2_h = 6.9314718055994528622676398299518041312695E-1L,
140 lg2_l = 2.3190468138462996154948554638754786504121E-17L,
141 ovt = 8.0085662595372944372e-0017L,
142 /* 2/(3*log(2)) */
143 cp = 9.6179669392597560490661645400126142495110E-1L,
144 cp_h = 9.6179669392597555432899980587535537779331E-1L,
145 cp_l = 5.0577616648125906047157785230014751039424E-17L;
147 #ifdef __STDC__
148 long double
149 __ieee754_powl (long double x, long double y)
150 #else
151 long double
152 __ieee754_powl (x, y)
153 long double x, y;
154 #endif
156 long double z, ax, z_h, z_l, p_h, p_l;
157 long double y1, t1, t2, r, s, t, u, v, w;
158 long double s2, s_h, s_l, t_h, t_l;
159 int32_t i, j, k, yisint, n;
160 u_int32_t ix, iy;
161 int32_t hx, hy;
162 ieee854_long_double_shape_type o, p, q;
164 p.value = x;
165 hx = p.parts32.w0;
166 ix = hx & 0x7fffffff;
168 q.value = y;
169 hy = q.parts32.w0;
170 iy = hy & 0x7fffffff;
173 /* y==zero: x**0 = 1 */
174 if ((iy | q.parts32.w1 | (q.parts32.w2 & 0x7fffffff) | q.parts32.w3) == 0)
175 return one;
177 /* 1.0**y = 1; -1.0**+-Inf = 1 */
178 if (x == one)
179 return one;
180 if (x == -1.0L && iy == 0x7ff00000
181 && (q.parts32.w1 | (q.parts32.w2 & 0x7fffffff) | q.parts32.w3) == 0)
182 return one;
184 /* +-NaN return x+y */
185 if ((ix > 0x7ff00000)
186 || ((ix == 0x7ff00000)
187 && ((p.parts32.w1 | (p.parts32.w2 & 0x7fffffff) | p.parts32.w3) != 0))
188 || (iy > 0x7ff00000)
189 || ((iy == 0x7ff00000)
190 && ((q.parts32.w1 | (q.parts32.w2 & 0x7fffffff) | q.parts32.w3) != 0)))
191 return x + y;
193 /* determine if y is an odd int when x < 0
194 * yisint = 0 ... y is not an integer
195 * yisint = 1 ... y is an odd int
196 * yisint = 2 ... y is an even int
198 yisint = 0;
199 if (hx < 0)
201 if ((q.parts32.w2 & 0x7fffffff) >= 0x43400000) /* Low part >= 2^53 */
202 yisint = 2; /* even integer y */
203 else if (iy >= 0x3ff00000) /* 1.0 */
205 if (__floorl (y) == y)
207 z = 0.5 * y;
208 if (__floorl (z) == z)
209 yisint = 2;
210 else
211 yisint = 1;
216 /* special value of y */
217 if ((q.parts32.w1 | (q.parts32.w2 & 0x7fffffff) | q.parts32.w3) == 0)
219 if (iy == 0x7ff00000 && q.parts32.w1 == 0) /* y is +-inf */
221 if (((ix - 0x3ff00000) | p.parts32.w1
222 | (p.parts32.w2 & 0x7fffffff) | p.parts32.w3) == 0)
223 return y - y; /* inf**+-1 is NaN */
224 else if (ix > 0x3ff00000 || fabsl (x) > 1.0L)
225 /* (|x|>1)**+-inf = inf,0 */
226 return (hy >= 0) ? y : zero;
227 else
228 /* (|x|<1)**-,+inf = inf,0 */
229 return (hy < 0) ? -y : zero;
231 if (iy == 0x3ff00000)
232 { /* y is +-1 */
233 if (hy < 0)
234 return one / x;
235 else
236 return x;
238 if (hy == 0x40000000)
239 return x * x; /* y is 2 */
240 if (hy == 0x3fe00000)
241 { /* y is 0.5 */
242 if (hx >= 0) /* x >= +0 */
243 return __ieee754_sqrtl (x);
247 ax = fabsl (x);
248 /* special value of x */
249 if ((p.parts32.w1 | (p.parts32.w2 & 0x7fffffff) | p.parts32.w3) == 0)
251 if (ix == 0x7ff00000 || ix == 0 || ix == 0x3ff00000)
253 z = ax; /*x is +-0,+-inf,+-1 */
254 if (hy < 0)
255 z = one / z; /* z = (1/|x|) */
256 if (hx < 0)
258 if (((ix - 0x3ff00000) | yisint) == 0)
260 z = (z - z) / (z - z); /* (-1)**non-int is NaN */
262 else if (yisint == 1)
263 z = -z; /* (x<0)**odd = -(|x|**odd) */
265 return z;
269 /* (x<0)**(non-int) is NaN */
270 if (((((u_int32_t) hx >> 31) - 1) | yisint) == 0)
271 return (x - x) / (x - x);
273 /* |y| is huge.
274 2^-16495 = 1/2 of smallest representable value.
275 If (1 - 1/131072)^y underflows, y > 1.4986e9 */
276 if (iy > 0x41d654b0)
278 /* if (1 - 2^-113)^y underflows, y > 1.1873e38 */
279 if (iy > 0x47d654b0)
281 if (ix <= 0x3fefffff)
282 return (hy < 0) ? huge * huge : tiny * tiny;
283 if (ix >= 0x3ff00000)
284 return (hy > 0) ? huge * huge : tiny * tiny;
286 /* over/underflow if x is not close to one */
287 if (ix < 0x3fefffff)
288 return (hy < 0) ? huge * huge : tiny * tiny;
289 if (ix > 0x3ff00000)
290 return (hy > 0) ? huge * huge : tiny * tiny;
293 n = 0;
294 /* take care subnormal number */
295 if (ix < 0x00100000)
297 ax *= two113;
298 n -= 113;
299 o.value = ax;
300 ix = o.parts32.w0;
302 n += ((ix) >> 20) - 0x3ff;
303 j = ix & 0x000fffff;
304 /* determine interval */
305 ix = j | 0x3ff00000; /* normalize ix */
306 if (j <= 0x39880)
307 k = 0; /* |x|<sqrt(3/2) */
308 else if (j < 0xbb670)
309 k = 1; /* |x|<sqrt(3) */
310 else
312 k = 0;
313 n += 1;
314 ix -= 0x00100000;
317 o.value = ax;
318 o.value = __scalbnl (o.value, ((int) ((ix - o.parts32.w0) * 2)) >> 21);
319 ax = o.value;
321 /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
322 u = ax - bp[k]; /* bp[0]=1.0, bp[1]=1.5 */
323 v = one / (ax + bp[k]);
324 s = u * v;
325 s_h = s;
327 o.value = s_h;
328 o.parts32.w3 = 0;
329 o.parts32.w2 &= 0xffff8000;
330 s_h = o.value;
331 /* t_h=ax+bp[k] High */
332 t_h = ax + bp[k];
333 o.value = t_h;
334 o.parts32.w3 = 0;
335 o.parts32.w2 &= 0xffff8000;
336 t_h = o.value;
337 t_l = ax - (t_h - bp[k]);
338 s_l = v * ((u - s_h * t_h) - s_h * t_l);
339 /* compute log(ax) */
340 s2 = s * s;
341 u = LN[0] + s2 * (LN[1] + s2 * (LN[2] + s2 * (LN[3] + s2 * LN[4])));
342 v = LD[0] + s2 * (LD[1] + s2 * (LD[2] + s2 * (LD[3] + s2 * (LD[4] + s2))));
343 r = s2 * s2 * u / v;
344 r += s_l * (s_h + s);
345 s2 = s_h * s_h;
346 t_h = 3.0 + s2 + r;
347 o.value = t_h;
348 o.parts32.w3 = 0;
349 o.parts32.w2 &= 0xffff8000;
350 t_h = o.value;
351 t_l = r - ((t_h - 3.0) - s2);
352 /* u+v = s*(1+...) */
353 u = s_h * t_h;
354 v = s_l * t_h + t_l * s;
355 /* 2/(3log2)*(s+...) */
356 p_h = u + v;
357 o.value = p_h;
358 o.parts32.w3 = 0;
359 o.parts32.w2 &= 0xffff8000;
360 p_h = o.value;
361 p_l = v - (p_h - u);
362 z_h = cp_h * p_h; /* cp_h+cp_l = 2/(3*log2) */
363 z_l = cp_l * p_h + p_l * cp + dp_l[k];
364 /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
365 t = (long double) n;
366 t1 = (((z_h + z_l) + dp_h[k]) + t);
367 o.value = t1;
368 o.parts32.w3 = 0;
369 o.parts32.w2 &= 0xffff8000;
370 t1 = o.value;
371 t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);
373 /* s (sign of result -ve**odd) = -1 else = 1 */
374 s = one;
375 if (((((u_int32_t) hx >> 31) - 1) | (yisint - 1)) == 0)
376 s = -one; /* (-ve)**(odd int) */
378 /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
379 y1 = y;
380 o.value = y1;
381 o.parts32.w3 = 0;
382 o.parts32.w2 &= 0xffff8000;
383 y1 = o.value;
384 p_l = (y - y1) * t1 + y * t2;
385 p_h = y1 * t1;
386 z = p_l + p_h;
387 o.value = z;
388 j = o.parts32.w0;
389 if (j >= 0x40d00000) /* z >= 16384 */
391 /* if z > 16384 */
392 if (((j - 0x40d00000) | o.parts32.w1
393 | (o.parts32.w2 & 0x7fffffff) | o.parts32.w3) != 0)
394 return s * huge * huge; /* overflow */
395 else
397 if (p_l + ovt > z - p_h)
398 return s * huge * huge; /* overflow */
401 else if ((j & 0x7fffffff) >= 0x40d01b90) /* z <= -16495 */
403 /* z < -16495 */
404 if (((j - 0xc0d01bc0) | o.parts32.w1
405 | (o.parts32.w2 & 0x7fffffff) | o.parts32.w3) != 0)
406 return s * tiny * tiny; /* underflow */
407 else
409 if (p_l <= z - p_h)
410 return s * tiny * tiny; /* underflow */
413 /* compute 2**(p_h+p_l) */
414 i = j & 0x7fffffff;
415 k = (i >> 20) - 0x3ff;
416 n = 0;
417 if (i > 0x3fe00000)
418 { /* if |z| > 0.5, set n = [z+0.5] */
419 n = __floorl (z + 0.5L);
420 t = n;
421 p_h -= t;
423 t = p_l + p_h;
424 o.value = t;
425 o.parts32.w3 = 0;
426 o.parts32.w2 &= 0xffff8000;
427 t = o.value;
428 u = t * lg2_h;
429 v = (p_l - (t - p_h)) * lg2 + t * lg2_l;
430 z = u + v;
431 w = v - (z - u);
432 /* exp(z) */
433 t = z * z;
434 u = PN[0] + t * (PN[1] + t * (PN[2] + t * (PN[3] + t * PN[4])));
435 v = PD[0] + t * (PD[1] + t * (PD[2] + t * (PD[3] + t)));
436 t1 = z - t * u / v;
437 r = (z * t1) / (t1 - two) - (w + z * w);
438 z = one - (r - z);
439 z = __scalbnl (z, n);
440 return s * z;