sysdeps/ieee754/ldbl-128/e_powl.c

   1 /*
   2  * ====================================================
   3  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
   4  *
   5  * Developed at SunPro, a Sun Microsystems, Inc. business.
   6  * Permission to use, copy, modify, and distribute this
   7  * software is freely granted, provided that this notice
   8  * is preserved.
   9  * ====================================================
  10  */
  11
  12 /* Expansions and modifications for 128-bit long double are
  13    Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
  14    and are incorporated herein by permission of the author.  The author
  15    reserves the right to distribute this material elsewhere under different
  16    copying permissions.  These modifications are distributed here under
  17    the following terms:
  18
  19     This library is free software; you can redistribute it and/or
  20     modify it under the terms of the GNU Lesser General Public
  21     License as published by the Free Software Foundation; either
  22     version 2.1 of the License, or (at your option) any later version.
  23
  24     This library is distributed in the hope that it will be useful,
  25     but WITHOUT ANY WARRANTY; without even the implied warranty of
  26     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  27     Lesser General Public License for more details.
  28
  29     You should have received a copy of the GNU Lesser General Public
  30     License along with this library; if not, see
  31     <https://www.gnu.org/licenses/>.  */
  32
  33 /* __ieee754_powl(x,y) return x**y
  34  *
  35  *                    n
  36  * Method:  Let x =  2   * (1+f)
  37  *      1. Compute and return log2(x) in two pieces:
  38  *              log2(x) = w1 + w2,
  39  *         where w1 has 113-53 = 60 bit trailing zeros.
  40  *      2. Perform y*log2(x) = n+y' by simulating muti-precision
  41  *         arithmetic, where |y'|<=0.5.
  42  *      3. Return x**y = 2**n*exp(y'*log2)
  43  *
  44  * Special cases:
  45  *      1.  (anything) ** 0  is 1
  46  *      2.  (anything) ** 1  is itself
  47  *      3.  (anything) ** NAN is NAN
  48  *      4.  NAN ** (anything except 0) is NAN
  49  *      5.  +-(|x| > 1) **  +INF is +INF
  50  *      6.  +-(|x| > 1) **  -INF is +0
  51  *      7.  +-(|x| < 1) **  +INF is +0
  52  *      8.  +-(|x| < 1) **  -INF is +INF
  53  *      9.  +-1         ** +-INF is NAN
  54  *      10. +0 ** (+anything except 0, NAN)               is +0
  55  *      11. -0 ** (+anything except 0, NAN, odd integer)  is +0
  56  *      12. +0 ** (-anything except 0, NAN)               is +INF
  57  *      13. -0 ** (-anything except 0, NAN, odd integer)  is +INF
  58  *      14. -0 ** (odd integer) = -( +0 ** (odd integer) )
  59  *      15. +INF ** (+anything except 0,NAN) is +INF
  60  *      16. +INF ** (-anything except 0,NAN) is +0
  61  *      17. -INF ** (anything)  = -0 ** (-anything)
  62  *      18. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
  63  *      19. (-anything except 0 and inf) ** (non-integer) is NAN
  64  *
  65  */
  66
  67 #include <math.h>
  68 #include <math-barriers.h>
  69 #include <math_private.h>
  70 #include <libm-alias-finite.h>
  71
  72 static const _Float128 bp[] = {
  73   1,
  74   L(1.5),
  75 };
  76
  77 /* log_2(1.5) */
  78 static const _Float128 dp_h[] = {
  79   0.0,
  80   L(5.8496250072115607565592654282227158546448E-1)
  81 };
  82
  83 /* Low part of log_2(1.5) */
  84 static const _Float128 dp_l[] = {
  85   0.0,
  86   L(1.0579781240112554492329533686862998106046E-16)
  87 };
  88
  89 static const _Float128 zero = 0,
  90   one = 1,
  91   two = 2,
  92   two113 = L(1.0384593717069655257060992658440192E34),
  93   huge = L(1.0e3000),
  94   tiny = L(1.0e-3000);
  95
  96 /* 3/2 log x = 3 z + z^3 + z^3 (z^2 R(z^2))
  97    z = (x-1)/(x+1)
  98    1 <= x <= 1.25
  99    Peak relative error 2.3e-37 */
 100 static const _Float128 LN[] =
 101 {
 102  L(-3.0779177200290054398792536829702930623200E1),
 103   L(6.5135778082209159921251824580292116201640E1),
 104  L(-4.6312921812152436921591152809994014413540E1),
 105   L(1.2510208195629420304615674658258363295208E1),
 106  L(-9.9266909031921425609179910128531667336670E-1)
 107 };
 108 static const _Float128 LD[] =
 109 {
 110  L(-5.129862866715009066465422805058933131960E1),
 111   L(1.452015077564081884387441590064272782044E2),
 112  L(-1.524043275549860505277434040464085593165E2),
 113   L(7.236063513651544224319663428634139768808E1),
 114  L(-1.494198912340228235853027849917095580053E1)
 115   /* 1.0E0 */
 116 };
 117
 118 /* exp(x) = 1 + x - x / (1 - 2 / (x - x^2 R(x^2)))
 119    0 <= x <= 0.5
 120    Peak relative error 5.7e-38  */
 121 static const _Float128 PN[] =
 122 {
 123   L(5.081801691915377692446852383385968225675E8),
 124   L(9.360895299872484512023336636427675327355E6),
 125   L(4.213701282274196030811629773097579432957E4),
 126   L(5.201006511142748908655720086041570288182E1),
 127   L(9.088368420359444263703202925095675982530E-3),
 128 };
 129 static const _Float128 PD[] =
 130 {
 131   L(3.049081015149226615468111430031590411682E9),
 132   L(1.069833887183886839966085436512368982758E8),
 133   L(8.259257717868875207333991924545445705394E5),
 134   L(1.872583833284143212651746812884298360922E3),
 135   /* 1.0E0 */
 136 };
 137
 138 static const _Float128
 139   /* ln 2 */
 140   lg2 = L(6.9314718055994530941723212145817656807550E-1),
 141   lg2_h = L(6.9314718055994528622676398299518041312695E-1),
 142   lg2_l = L(2.3190468138462996154948554638754786504121E-17),
 143   ovt = L(8.0085662595372944372e-0017),
 144   /* 2/(3*log(2)) */
 145   cp = L(9.6179669392597560490661645400126142495110E-1),
 146   cp_h = L(9.6179669392597555432899980587535537779331E-1),
 147   cp_l = L(5.0577616648125906047157785230014751039424E-17);
 148
 149 _Float128
 150 __ieee754_powl (_Float128 x, _Float128 y)
 151 {
 152   _Float128 z, ax, z_h, z_l, p_h, p_l;
 153   _Float128 y1, t1, t2, r, s, sgn, t, u, v, w;
 154   _Float128 s2, s_h, s_l, t_h, t_l, ay;
 155   int32_t i, j, k, yisint, n;
 156   uint32_t ix, iy;
 157   int32_t hx, hy;
 158   ieee854_long_double_shape_type o, p, q;
 159
 160   p.value = x;
 161   hx = p.parts32.w0;
 162   ix = hx & 0x7fffffff;
 163
 164   q.value = y;
 165   hy = q.parts32.w0;
 166   iy = hy & 0x7fffffff;
 167
 168
 169   /* y==zero: x**0 = 1 */
 170   if ((iy | q.parts32.w1 | q.parts32.w2 | q.parts32.w3) == 0
 171       && !issignaling (x))
 172     return one;
 173
 174   /* 1.0**y = 1; -1.0**+-Inf = 1 */
 175   if (x == one && !issignaling (y))
 176     return one;
 177   if (x == -1 && iy == 0x7fff0000
 178       && (q.parts32.w1 | q.parts32.w2 | q.parts32.w3) == 0)
 179     return one;
 180
 181   /* +-NaN return x+y */
 182   if ((ix > 0x7fff0000)
 183       || ((ix == 0x7fff0000)
 184           && ((p.parts32.w1 | p.parts32.w2 | p.parts32.w3) != 0))
 185       || (iy > 0x7fff0000)
 186       || ((iy == 0x7fff0000)
 187           && ((q.parts32.w1 | q.parts32.w2 | q.parts32.w3) != 0)))
 188     return x + y;
 189
 190   /* determine if y is an odd int when x < 0
 191    * yisint = 0       ... y is not an integer
 192    * yisint = 1       ... y is an odd int
 193    * yisint = 2       ... y is an even int
 194    */
 195   yisint = 0;
 196   if (hx < 0)
 197     {
 198       if (iy >= 0x40700000)     /* 2^113 */
 199         yisint = 2;             /* even integer y */
 200       else if (iy >= 0x3fff0000)        /* 1.0 */
 201         {
 202           if (floorl (y) == y)
 203             {
 204               z = 0.5 * y;
 205               if (floorl (z) == z)
 206                 yisint = 2;
 207               else
 208                 yisint = 1;
 209             }
 210         }
 211     }
 212
 213   /* special value of y */
 214   if ((q.parts32.w1 | q.parts32.w2 | q.parts32.w3) == 0)
 215     {
 216       if (iy == 0x7fff0000)     /* y is +-inf */
 217         {
 218           if (((ix - 0x3fff0000) | p.parts32.w1 | p.parts32.w2 | p.parts32.w3)
 219               == 0)
 220             return y - y;       /* +-1**inf is NaN */
 221           else if (ix >= 0x3fff0000)    /* (|x|>1)**+-inf = inf,0 */
 222             return (hy >= 0) ? y : zero;
 223           else                  /* (|x|<1)**-,+inf = inf,0 */
 224             return (hy < 0) ? -y : zero;
 225         }
 226       if (iy == 0x3fff0000)
 227         {                       /* y is  +-1 */
 228           if (hy < 0)
 229             return one / x;
 230           else
 231             return x;
 232         }
 233       if (hy == 0x40000000)
 234         return x * x;           /* y is  2 */
 235       if (hy == 0x3ffe0000)
 236         {                       /* y is  0.5 */
 237           if (hx >= 0)          /* x >= +0 */
 238             return sqrtl (x);
 239         }
 240     }
 241
 242   ax = fabsl (x);
 243   /* special value of x */
 244   if ((p.parts32.w1 | p.parts32.w2 | p.parts32.w3) == 0)
 245     {
 246       if (ix == 0x7fff0000 || ix == 0 || ix == 0x3fff0000)
 247         {
 248           z = ax;               /*x is +-0,+-inf,+-1 */
 249           if (hy < 0)
 250             z = one / z;        /* z = (1/|x|) */
 251           if (hx < 0)
 252             {
 253               if (((ix - 0x3fff0000) | yisint) == 0)
 254                 {
 255                   z = (z - z) / (z - z);        /* (-1)**non-int is NaN */
 256                 }
 257               else if (yisint == 1)
 258                 z = -z;         /* (x<0)**odd = -(|x|**odd) */
 259             }
 260           return z;
 261         }
 262     }
 263
 264   /* (x<0)**(non-int) is NaN */
 265   if (((((uint32_t) hx >> 31) - 1) | yisint) == 0)
 266     return (x - x) / (x - x);
 267
 268   /* sgn (sign of result -ve**odd) = -1 else = 1 */
 269   sgn = one;
 270   if (((((uint32_t) hx >> 31) - 1) | (yisint - 1)) == 0)
 271     sgn = -one;                 /* (-ve)**(odd int) */
 272
 273   /* |y| is huge.
 274      2^-16495 = 1/2 of smallest representable value.
 275      If (1 - 1/131072)^y underflows, y > 1.4986e9 */
 276   if (iy > 0x401d654b)
 277     {
 278       /* if (1 - 2^-113)^y underflows, y > 1.1873e38 */
 279       if (iy > 0x407d654b)
 280         {
 281           if (ix <= 0x3ffeffff)
 282             return (hy < 0) ? huge * huge : tiny * tiny;
 283           if (ix >= 0x3fff0000)
 284             return (hy > 0) ? huge * huge : tiny * tiny;
 285         }
 286       /* over/underflow if x is not close to one */
 287       if (ix < 0x3ffeffff)
 288         return (hy < 0) ? sgn * huge * huge : sgn * tiny * tiny;
 289       if (ix > 0x3fff0000)
 290         return (hy > 0) ? sgn * huge * huge : sgn * tiny * tiny;
 291     }
 292
 293   ay = y > 0 ? y : -y;
 294   if (ay < 0x1p-128)
 295     y = y < 0 ? -0x1p-128 : 0x1p-128;
 296
 297   n = 0;
 298   /* take care subnormal number */
 299   if (ix < 0x00010000)
 300     {
 301       ax *= two113;
 302       n -= 113;
 303       o.value = ax;
 304       ix = o.parts32.w0;
 305     }
 306   n += ((ix) >> 16) - 0x3fff;
 307   j = ix & 0x0000ffff;
 308   /* determine interval */
 309   ix = j | 0x3fff0000;          /* normalize ix */
 310   if (j <= 0x3988)
 311     k = 0;                      /* |x|<sqrt(3/2) */
 312   else if (j < 0xbb67)
 313     k = 1;                      /* |x|<sqrt(3)   */
 314   else
 315     {
 316       k = 0;
 317       n += 1;
 318       ix -= 0x00010000;
 319     }
 320
 321   o.value = ax;
 322   o.parts32.w0 = ix;
 323   ax = o.value;
 324
 325   /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
 326   u = ax - bp[k];               /* bp[0]=1.0, bp[1]=1.5 */
 327   v = one / (ax + bp[k]);
 328   s = u * v;
 329   s_h = s;
 330
 331   o.value = s_h;
 332   o.parts32.w3 = 0;
 333   o.parts32.w2 &= 0xf8000000;
 334   s_h = o.value;
 335   /* t_h=ax+bp[k] High */
 336   t_h = ax + bp[k];
 337   o.value = t_h;
 338   o.parts32.w3 = 0;
 339   o.parts32.w2 &= 0xf8000000;
 340   t_h = o.value;
 341   t_l = ax - (t_h - bp[k]);
 342   s_l = v * ((u - s_h * t_h) - s_h * t_l);
 343   /* compute log(ax) */
 344   s2 = s * s;
 345   u = LN[0] + s2 * (LN[1] + s2 * (LN[2] + s2 * (LN[3] + s2 * LN[4])));
 346   v = LD[0] + s2 * (LD[1] + s2 * (LD[2] + s2 * (LD[3] + s2 * (LD[4] + s2))));
 347   r = s2 * s2 * u / v;
 348   r += s_l * (s_h + s);
 349   s2 = s_h * s_h;
 350   t_h = 3.0 + s2 + r;
 351   o.value = t_h;
 352   o.parts32.w3 = 0;
 353   o.parts32.w2 &= 0xf8000000;
 354   t_h = o.value;
 355   t_l = r - ((t_h - 3.0) - s2);
 356   /* u+v = s*(1+...) */
 357   u = s_h * t_h;
 358   v = s_l * t_h + t_l * s;
 359   /* 2/(3log2)*(s+...) */
 360   p_h = u + v;
 361   o.value = p_h;
 362   o.parts32.w3 = 0;
 363   o.parts32.w2 &= 0xf8000000;
 364   p_h = o.value;
 365   p_l = v - (p_h - u);
 366   z_h = cp_h * p_h;             /* cp_h+cp_l = 2/(3*log2) */
 367   z_l = cp_l * p_h + p_l * cp + dp_l[k];
 368   /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
 369   t = (_Float128) n;
 370   t1 = (((z_h + z_l) + dp_h[k]) + t);
 371   o.value = t1;
 372   o.parts32.w3 = 0;
 373   o.parts32.w2 &= 0xf8000000;
 374   t1 = o.value;
 375   t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);
 376
 377   /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
 378   y1 = y;
 379   o.value = y1;
 380   o.parts32.w3 = 0;
 381   o.parts32.w2 &= 0xf8000000;
 382   y1 = o.value;
 383   p_l = (y - y1) * t1 + y * t2;
 384   p_h = y1 * t1;
 385   z = p_l + p_h;
 386   o.value = z;
 387   j = o.parts32.w0;
 388   if (j >= 0x400d0000) /* z >= 16384 */
 389     {
 390       /* if z > 16384 */
 391       if (((j - 0x400d0000) | o.parts32.w1 | o.parts32.w2 | o.parts32.w3) != 0)
 392         return sgn * huge * huge;       /* overflow */
 393       else
 394         {
 395           if (p_l + ovt > z - p_h)
 396             return sgn * huge * huge;   /* overflow */
 397         }
 398     }
 399   else if ((j & 0x7fffffff) >= 0x400d01b9)      /* z <= -16495 */
 400     {
 401       /* z < -16495 */
 402       if (((j - 0xc00d01bc) | o.parts32.w1 | o.parts32.w2 | o.parts32.w3)
 403           != 0)
 404         return sgn * tiny * tiny;       /* underflow */
 405       else
 406         {
 407           if (p_l <= z - p_h)
 408             return sgn * tiny * tiny;   /* underflow */
 409         }
 410     }
 411   /* compute 2**(p_h+p_l) */
 412   i = j & 0x7fffffff;
 413   k = (i >> 16) - 0x3fff;
 414   n = 0;
 415   if (i > 0x3ffe0000)
 416     {                           /* if |z| > 0.5, set n = [z+0.5] */
 417       n = floorl (z + L(0.5));
 418       t = n;
 419       p_h -= t;
 420     }
 421   t = p_l + p_h;
 422   o.value = t;
 423   o.parts32.w3 = 0;
 424   o.parts32.w2 &= 0xf8000000;
 425   t = o.value;
 426   u = t * lg2_h;
 427   v = (p_l - (t - p_h)) * lg2 + t * lg2_l;
 428   z = u + v;
 429   w = v - (z - u);
 430   /*  exp(z) */
 431   t = z * z;
 432   u = PN[0] + t * (PN[1] + t * (PN[2] + t * (PN[3] + t * PN[4])));
 433   v = PD[0] + t * (PD[1] + t * (PD[2] + t * (PD[3] + t)));
 434   t1 = z - t * u / v;
 435   r = (z * t1) / (t1 - two) - (w + z * w);
 436   z = one - (r - z);
 437   o.value = z;
 438   j = o.parts32.w0;
 439   j += (n << 16);
 440   if ((j >> 16) <= 0)
 441     {
 442       z = __scalbnl (z, n);     /* subnormal output */
 443       _Float128 force_underflow = z * z;
 444       math_force_eval (force_underflow);
 445     }
 446   else
 447     {
 448       o.parts32.w0 = j;
 449       z = o.value;
 450     }
 451   return sgn * z;
 452 }
 453 libm_alias_finite (__ieee754_powl, __powl)