sysdeps/ieee754/ldbl-128/e_powl.c

   1 /*
   2  * ====================================================
   3  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
   4  *
   5  * Developed at SunPro, a Sun Microsystems, Inc. business.
   6  * Permission to use, copy, modify, and distribute this
   7  * software is freely granted, provided that this notice
   8  * is preserved.
   9  * ====================================================
  10  */
  11
  12 /* Expansions and modifications for 128-bit long double are
  13    Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
  14    and are incorporated herein by permission of the author.  The author
  15    reserves the right to distribute this material elsewhere under different
  16    copying permissions.  These modifications are distributed here under
  17    the following terms:
  18
  19     This library is free software; you can redistribute it and/or
  20     modify it under the terms of the GNU Lesser General Public
  21     License as published by the Free Software Foundation; either
  22     version 2.1 of the License, or (at your option) any later version.
  23
  24     This library is distributed in the hope that it will be useful,
  25     but WITHOUT ANY WARRANTY; without even the implied warranty of
  26     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  27     Lesser General Public License for more details.
  28
  29     You should have received a copy of the GNU Lesser General Public
  30     License along with this library; if not, write to the Free Software
  31     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA */
  32
  33 /* __ieee754_powl(x,y) return x**y
  34  *
  35  *                    n
  36  * Method:  Let x =  2   * (1+f)
  37  *      1. Compute and return log2(x) in two pieces:
  38  *              log2(x) = w1 + w2,
  39  *         where w1 has 113-53 = 60 bit trailing zeros.
  40  *      2. Perform y*log2(x) = n+y' by simulating muti-precision
  41  *         arithmetic, where |y'|<=0.5.
  42  *      3. Return x**y = 2**n*exp(y'*log2)
  43  *
  44  * Special cases:
  45  *      1.  (anything) ** 0  is 1
  46  *      2.  (anything) ** 1  is itself
  47  *      3.  (anything) ** NAN is NAN
  48  *      4.  NAN ** (anything except 0) is NAN
  49  *      5.  +-(|x| > 1) **  +INF is +INF
  50  *      6.  +-(|x| > 1) **  -INF is +0
  51  *      7.  +-(|x| < 1) **  +INF is +0
  52  *      8.  +-(|x| < 1) **  -INF is +INF
  53  *      9.  +-1         ** +-INF is NAN
  54  *      10. +0 ** (+anything except 0, NAN)               is +0
  55  *      11. -0 ** (+anything except 0, NAN, odd integer)  is +0
  56  *      12. +0 ** (-anything except 0, NAN)               is +INF
  57  *      13. -0 ** (-anything except 0, NAN, odd integer)  is +INF
  58  *      14. -0 ** (odd integer) = -( +0 ** (odd integer) )
  59  *      15. +INF ** (+anything except 0,NAN) is +INF
  60  *      16. +INF ** (-anything except 0,NAN) is +0
  61  *      17. -INF ** (anything)  = -0 ** (-anything)
  62  *      18. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
  63  *      19. (-anything except 0 and inf) ** (non-integer) is NAN
  64  *
  65  */
  66
  67 #include "math.h"
  68 #include "math_private.h"
  69
  70 static const long double bp[] = {
  71   1.0L,
  72   1.5L,
  73 };
  74
  75 /* log_2(1.5) */
  76 static const long double dp_h[] = {
  77   0.0,
  78   5.8496250072115607565592654282227158546448E-1L
  79 };
  80
  81 /* Low part of log_2(1.5) */
  82 static const long double dp_l[] = {
  83   0.0,
  84   1.0579781240112554492329533686862998106046E-16L
  85 };
  86
  87 static const long double zero = 0.0L,
  88   one = 1.0L,
  89   two = 2.0L,
  90   two113 = 1.0384593717069655257060992658440192E34L,
  91   huge = 1.0e3000L,
  92   tiny = 1.0e-3000L;
  93
  94 /* 3/2 log x = 3 z + z^3 + z^3 (z^2 R(z^2))
  95    z = (x-1)/(x+1)
  96    1 <= x <= 1.25
  97    Peak relative error 2.3e-37 */
  98 static const long double LN[] =
  99 {
 100  -3.0779177200290054398792536829702930623200E1L,
 101   6.5135778082209159921251824580292116201640E1L,
 102  -4.6312921812152436921591152809994014413540E1L,
 103   1.2510208195629420304615674658258363295208E1L,
 104  -9.9266909031921425609179910128531667336670E-1L
 105 };
 106 static const long double LD[] =
 107 {
 108  -5.129862866715009066465422805058933131960E1L,
 109   1.452015077564081884387441590064272782044E2L,
 110  -1.524043275549860505277434040464085593165E2L,
 111   7.236063513651544224319663428634139768808E1L,
 112  -1.494198912340228235853027849917095580053E1L
 113   /* 1.0E0 */
 114 };
 115
 116 /* exp(x) = 1 + x - x / (1 - 2 / (x - x^2 R(x^2)))
 117    0 <= x <= 0.5
 118    Peak relative error 5.7e-38  */
 119 static const long double PN[] =
 120 {
 121   5.081801691915377692446852383385968225675E8L,
 122   9.360895299872484512023336636427675327355E6L,
 123   4.213701282274196030811629773097579432957E4L,
 124   5.201006511142748908655720086041570288182E1L,
 125   9.088368420359444263703202925095675982530E-3L,
 126 };
 127 static const long double PD[] =
 128 {
 129   3.049081015149226615468111430031590411682E9L,
 130   1.069833887183886839966085436512368982758E8L,
 131   8.259257717868875207333991924545445705394E5L,
 132   1.872583833284143212651746812884298360922E3L,
 133   /* 1.0E0 */
 134 };
 135
 136 static const long double
 137   /* ln 2 */
 138   lg2 = 6.9314718055994530941723212145817656807550E-1L,
 139   lg2_h = 6.9314718055994528622676398299518041312695E-1L,
 140   lg2_l = 2.3190468138462996154948554638754786504121E-17L,
 141   ovt = 8.0085662595372944372e-0017L,
 142   /* 2/(3*log(2)) */
 143   cp = 9.6179669392597560490661645400126142495110E-1L,
 144   cp_h = 9.6179669392597555432899980587535537779331E-1L,
 145   cp_l = 5.0577616648125906047157785230014751039424E-17L;
 146
 147 #ifdef __STDC__
 148 long double
 149 __ieee754_powl (long double x, long double y)
 150 #else
 151 long double
 152 __ieee754_powl (x, y)
 153      long double x, y;
 154 #endif
 155 {
 156   long double z, ax, z_h, z_l, p_h, p_l;
 157   long double y1, t1, t2, r, s, t, u, v, w;
 158   long double s2, s_h, s_l, t_h, t_l;
 159   int32_t i, j, k, yisint, n;
 160   u_int32_t ix, iy;
 161   int32_t hx, hy;
 162   ieee854_long_double_shape_type o, p, q;
 163
 164   p.value = x;
 165   hx = p.parts32.w0;
 166   ix = hx & 0x7fffffff;
 167
 168   q.value = y;
 169   hy = q.parts32.w0;
 170   iy = hy & 0x7fffffff;
 171
 172
 173   /* y==zero: x**0 = 1 */
 174   if ((iy | q.parts32.w1 | q.parts32.w2 | q.parts32.w3) == 0)
 175     return one;
 176
 177   /* 1.0**y = 1; -1.0**+-Inf = 1 */
 178   if (x == one)
 179     return one;
 180   if (x == -1.0L && iy == 0x7fff0000
 181       && (q.parts32.w1 | q.parts32.w2 | q.parts32.w3) == 0)
 182     return one;
 183
 184   /* +-NaN return x+y */
 185   if ((ix > 0x7fff0000)
 186       || ((ix == 0x7fff0000)
 187           && ((p.parts32.w1 | p.parts32.w2 | p.parts32.w3) != 0))
 188       || (iy > 0x7fff0000)
 189       || ((iy == 0x7fff0000)
 190           && ((q.parts32.w1 | q.parts32.w2 | q.parts32.w3) != 0)))
 191     return x + y;
 192
 193   /* determine if y is an odd int when x < 0
 194    * yisint = 0       ... y is not an integer
 195    * yisint = 1       ... y is an odd int
 196    * yisint = 2       ... y is an even int
 197    */
 198   yisint = 0;
 199   if (hx < 0)
 200     {
 201       if (iy >= 0x40700000)     /* 2^113 */
 202         yisint = 2;             /* even integer y */
 203       else if (iy >= 0x3fff0000)        /* 1.0 */
 204         {
 205           if (__floorl (y) == y)
 206             {
 207               z = 0.5 * y;
 208               if (__floorl (z) == z)
 209                 yisint = 2;
 210               else
 211                 yisint = 1;
 212             }
 213         }
 214     }
 215
 216   /* special value of y */
 217   if ((q.parts32.w1 | q.parts32.w2 | q.parts32.w3) == 0)
 218     {
 219       if (iy == 0x7fff0000)     /* y is +-inf */
 220         {
 221           if (((ix - 0x3fff0000) | p.parts32.w1 | p.parts32.w2 | p.parts32.w3)
 222               == 0)
 223             return y - y;       /* +-1**inf is NaN */
 224           else if (ix >= 0x3fff0000)    /* (|x|>1)**+-inf = inf,0 */
 225             return (hy >= 0) ? y : zero;
 226           else                  /* (|x|<1)**-,+inf = inf,0 */
 227             return (hy < 0) ? -y : zero;
 228         }
 229       if (iy == 0x3fff0000)
 230         {                       /* y is  +-1 */
 231           if (hy < 0)
 232             return one / x;
 233           else
 234             return x;
 235         }
 236       if (hy == 0x40000000)
 237         return x * x;           /* y is  2 */
 238       if (hy == 0x3ffe0000)
 239         {                       /* y is  0.5 */
 240           if (hx >= 0)          /* x >= +0 */
 241             return __ieee754_sqrtl (x);
 242         }
 243     }
 244
 245   ax = fabsl (x);
 246   /* special value of x */
 247   if ((p.parts32.w1 | p.parts32.w2 | p.parts32.w3) == 0)
 248     {
 249       if (ix == 0x7fff0000 || ix == 0 || ix == 0x3fff0000)
 250         {
 251           z = ax;               /*x is +-0,+-inf,+-1 */
 252           if (hy < 0)
 253             z = one / z;        /* z = (1/|x|) */
 254           if (hx < 0)
 255             {
 256               if (((ix - 0x3fff0000) | yisint) == 0)
 257                 {
 258                   z = (z - z) / (z - z);        /* (-1)**non-int is NaN */
 259                 }
 260               else if (yisint == 1)
 261                 z = -z;         /* (x<0)**odd = -(|x|**odd) */
 262             }
 263           return z;
 264         }
 265     }
 266
 267   /* (x<0)**(non-int) is NaN */
 268   if (((((u_int32_t) hx >> 31) - 1) | yisint) == 0)
 269     return (x - x) / (x - x);
 270
 271   /* |y| is huge.
 272      2^-16495 = 1/2 of smallest representable value.
 273      If (1 - 1/131072)^y underflows, y > 1.4986e9 */
 274   if (iy > 0x401d654b)
 275     {
 276       /* if (1 - 2^-113)^y underflows, y > 1.1873e38 */
 277       if (iy > 0x407d654b)
 278         {
 279           if (ix <= 0x3ffeffff)
 280             return (hy < 0) ? huge * huge : tiny * tiny;
 281           if (ix >= 0x3fff0000)
 282             return (hy > 0) ? huge * huge : tiny * tiny;
 283         }
 284       /* over/underflow if x is not close to one */
 285       if (ix < 0x3ffeffff)
 286         return (hy < 0) ? huge * huge : tiny * tiny;
 287       if (ix > 0x3fff0000)
 288         return (hy > 0) ? huge * huge : tiny * tiny;
 289     }
 290
 291   n = 0;
 292   /* take care subnormal number */
 293   if (ix < 0x00010000)
 294     {
 295       ax *= two113;
 296       n -= 113;
 297       o.value = ax;
 298       ix = o.parts32.w0;
 299     }
 300   n += ((ix) >> 16) - 0x3fff;
 301   j = ix & 0x0000ffff;
 302   /* determine interval */
 303   ix = j | 0x3fff0000;          /* normalize ix */
 304   if (j <= 0x3988)
 305     k = 0;                      /* |x|<sqrt(3/2) */
 306   else if (j < 0xbb67)
 307     k = 1;                      /* |x|<sqrt(3)   */
 308   else
 309     {
 310       k = 0;
 311       n += 1;
 312       ix -= 0x00010000;
 313     }
 314
 315   o.value = ax;
 316   o.parts32.w0 = ix;
 317   ax = o.value;
 318
 319   /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
 320   u = ax - bp[k];               /* bp[0]=1.0, bp[1]=1.5 */
 321   v = one / (ax + bp[k]);
 322   s = u * v;
 323   s_h = s;
 324
 325   o.value = s_h;
 326   o.parts32.w3 = 0;
 327   o.parts32.w2 &= 0xf8000000;
 328   s_h = o.value;
 329   /* t_h=ax+bp[k] High */
 330   t_h = ax + bp[k];
 331   o.value = t_h;
 332   o.parts32.w3 = 0;
 333   o.parts32.w2 &= 0xf8000000;
 334   t_h = o.value;
 335   t_l = ax - (t_h - bp[k]);
 336   s_l = v * ((u - s_h * t_h) - s_h * t_l);
 337   /* compute log(ax) */
 338   s2 = s * s;
 339   u = LN[0] + s2 * (LN[1] + s2 * (LN[2] + s2 * (LN[3] + s2 * LN[4])));
 340   v = LD[0] + s2 * (LD[1] + s2 * (LD[2] + s2 * (LD[3] + s2 * (LD[4] + s2))));
 341   r = s2 * s2 * u / v;
 342   r += s_l * (s_h + s);
 343   s2 = s_h * s_h;
 344   t_h = 3.0 + s2 + r;
 345   o.value = t_h;
 346   o.parts32.w3 = 0;
 347   o.parts32.w2 &= 0xf8000000;
 348   t_h = o.value;
 349   t_l = r - ((t_h - 3.0) - s2);
 350   /* u+v = s*(1+...) */
 351   u = s_h * t_h;
 352   v = s_l * t_h + t_l * s;
 353   /* 2/(3log2)*(s+...) */
 354   p_h = u + v;
 355   o.value = p_h;
 356   o.parts32.w3 = 0;
 357   o.parts32.w2 &= 0xf8000000;
 358   p_h = o.value;
 359   p_l = v - (p_h - u);
 360   z_h = cp_h * p_h;             /* cp_h+cp_l = 2/(3*log2) */
 361   z_l = cp_l * p_h + p_l * cp + dp_l[k];
 362   /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
 363   t = (long double) n;
 364   t1 = (((z_h + z_l) + dp_h[k]) + t);
 365   o.value = t1;
 366   o.parts32.w3 = 0;
 367   o.parts32.w2 &= 0xf8000000;
 368   t1 = o.value;
 369   t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);
 370
 371   /* s (sign of result -ve**odd) = -1 else = 1 */
 372   s = one;
 373   if (((((u_int32_t) hx >> 31) - 1) | (yisint - 1)) == 0)
 374     s = -one;                   /* (-ve)**(odd int) */
 375
 376   /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
 377   y1 = y;
 378   o.value = y1;
 379   o.parts32.w3 = 0;
 380   o.parts32.w2 &= 0xf8000000;
 381   y1 = o.value;
 382   p_l = (y - y1) * t1 + y * t2;
 383   p_h = y1 * t1;
 384   z = p_l + p_h;
 385   o.value = z;
 386   j = o.parts32.w0;
 387   if (j >= 0x400d0000) /* z >= 16384 */
 388     {
 389       /* if z > 16384 */
 390       if (((j - 0x400d0000) | o.parts32.w1 | o.parts32.w2 | o.parts32.w3) != 0)
 391         return s * huge * huge; /* overflow */
 392       else
 393         {
 394           if (p_l + ovt > z - p_h)
 395             return s * huge * huge;     /* overflow */
 396         }
 397     }
 398   else if ((j & 0x7fffffff) >= 0x400d01b9)      /* z <= -16495 */
 399     {
 400       /* z < -16495 */
 401       if (((j - 0xc00d01bc) | o.parts32.w1 | o.parts32.w2 | o.parts32.w3)
 402           != 0)
 403         return s * tiny * tiny; /* underflow */
 404       else
 405         {
 406           if (p_l <= z - p_h)
 407             return s * tiny * tiny;     /* underflow */
 408         }
 409     }
 410   /* compute 2**(p_h+p_l) */
 411   i = j & 0x7fffffff;
 412   k = (i >> 16) - 0x3fff;
 413   n = 0;
 414   if (i > 0x3ffe0000)
 415     {                           /* if |z| > 0.5, set n = [z+0.5] */
 416       n = __floorl (z + 0.5L);
 417       t = n;
 418       p_h -= t;
 419     }
 420   t = p_l + p_h;
 421   o.value = t;
 422   o.parts32.w3 = 0;
 423   o.parts32.w2 &= 0xf8000000;
 424   t = o.value;
 425   u = t * lg2_h;
 426   v = (p_l - (t - p_h)) * lg2 + t * lg2_l;
 427   z = u + v;
 428   w = v - (z - u);
 429   /*  exp(z) */
 430   t = z * z;
 431   u = PN[0] + t * (PN[1] + t * (PN[2] + t * (PN[3] + t * PN[4])));
 432   v = PD[0] + t * (PD[1] + t * (PD[2] + t * (PD[3] + t)));
 433   t1 = z - t * u / v;
 434   r = (z * t1) / (t1 - two) - (w + z * w);
 435   z = one - (r - z);
 436   o.value = z;
 437   j = o.parts32.w0;
 438   j += (n << 16);
 439   if ((j >> 16) <= 0)
 440     z = __scalbnl (z, n);       /* subnormal output */
 441   else
 442     {
 443       o.parts32.w0 = j;
 444       z = o.value;
 445     }
 446   return s * z;
 447 }