sysdeps/ieee754/ldbl-128ibm/e_powl.c

   1 /*
   2  * ====================================================
   3  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
   4  *
   5  * Developed at SunPro, a Sun Microsystems, Inc. business.
   6  * Permission to use, copy, modify, and distribute this
   7  * software is freely granted, provided that this notice
   8  * is preserved.
   9  * ====================================================
  10  */
  11
  12 /* Expansions and modifications for 128-bit long double are
  13    Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
  14    and are incorporated herein by permission of the author.  The author
  15    reserves the right to distribute this material elsewhere under different
  16    copying permissions.  These modifications are distributed here under
  17    the following terms:
  18
  19     This library is free software; you can redistribute it and/or
  20     modify it under the terms of the GNU Lesser General Public
  21     License as published by the Free Software Foundation; either
  22     version 2.1 of the License, or (at your option) any later version.
  23
  24     This library is distributed in the hope that it will be useful,
  25     but WITHOUT ANY WARRANTY; without even the implied warranty of
  26     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  27     Lesser General Public License for more details.
  28
  29     You should have received a copy of the GNU Lesser General Public
  30     License along with this library; if not, write to the Free Software
  31     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA */
  32
  33 /* __ieee754_powl(x,y) return x**y
  34  *
  35  *                    n
  36  * Method:  Let x =  2   * (1+f)
  37  *      1. Compute and return log2(x) in two pieces:
  38  *              log2(x) = w1 + w2,
  39  *         where w1 has 113-53 = 60 bit trailing zeros.
  40  *      2. Perform y*log2(x) = n+y' by simulating muti-precision
  41  *         arithmetic, where |y'|<=0.5.
  42  *      3. Return x**y = 2**n*exp(y'*log2)
  43  *
  44  * Special cases:
  45  *      1.  (anything) ** 0  is 1
  46  *      2.  (anything) ** 1  is itself
  47  *      3.  (anything) ** NAN is NAN
  48  *      4.  NAN ** (anything except 0) is NAN
  49  *      5.  +-(|x| > 1) **  +INF is +INF
  50  *      6.  +-(|x| > 1) **  -INF is +0
  51  *      7.  +-(|x| < 1) **  +INF is +0
  52  *      8.  +-(|x| < 1) **  -INF is +INF
  53  *      9.  +-1         ** +-INF is NAN
  54  *      10. +0 ** (+anything except 0, NAN)               is +0
  55  *      11. -0 ** (+anything except 0, NAN, odd integer)  is +0
  56  *      12. +0 ** (-anything except 0, NAN)               is +INF
  57  *      13. -0 ** (-anything except 0, NAN, odd integer)  is +INF
  58  *      14. -0 ** (odd integer) = -( +0 ** (odd integer) )
  59  *      15. +INF ** (+anything except 0,NAN) is +INF
  60  *      16. +INF ** (-anything except 0,NAN) is +0
  61  *      17. -INF ** (anything)  = -0 ** (-anything)
  62  *      18. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
  63  *      19. (-anything except 0 and inf) ** (non-integer) is NAN
  64  *
  65  */
  66
  67 #include "math.h"
  68 #include "math_private.h"
  69
  70 static const long double bp[] = {
  71   1.0L,
  72   1.5L,
  73 };
  74
  75 /* log_2(1.5) */
  76 static const long double dp_h[] = {
  77   0.0,
  78   5.8496250072115607565592654282227158546448E-1L
  79 };
  80
  81 /* Low part of log_2(1.5) */
  82 static const long double dp_l[] = {
  83   0.0,
  84   1.0579781240112554492329533686862998106046E-16L
  85 };
  86
  87 static const long double zero = 0.0L,
  88   one = 1.0L,
  89   two = 2.0L,
  90   two113 = 1.0384593717069655257060992658440192E34L,
  91   huge = 1.0e3000L,
  92   tiny = 1.0e-3000L;
  93
  94 /* 3/2 log x = 3 z + z^3 + z^3 (z^2 R(z^2))
  95    z = (x-1)/(x+1)
  96    1 <= x <= 1.25
  97    Peak relative error 2.3e-37 */
  98 static const long double LN[] =
  99 {
 100  -3.0779177200290054398792536829702930623200E1L,
 101   6.5135778082209159921251824580292116201640E1L,
 102  -4.6312921812152436921591152809994014413540E1L,
 103   1.2510208195629420304615674658258363295208E1L,
 104  -9.9266909031921425609179910128531667336670E-1L
 105 };
 106 static const long double LD[] =
 107 {
 108  -5.129862866715009066465422805058933131960E1L,
 109   1.452015077564081884387441590064272782044E2L,
 110  -1.524043275549860505277434040464085593165E2L,
 111   7.236063513651544224319663428634139768808E1L,
 112  -1.494198912340228235853027849917095580053E1L
 113   /* 1.0E0 */
 114 };
 115
 116 /* exp(x) = 1 + x - x / (1 - 2 / (x - x^2 R(x^2)))
 117    0 <= x <= 0.5
 118    Peak relative error 5.7e-38  */
 119 static const long double PN[] =
 120 {
 121   5.081801691915377692446852383385968225675E8L,
 122   9.360895299872484512023336636427675327355E6L,
 123   4.213701282274196030811629773097579432957E4L,
 124   5.201006511142748908655720086041570288182E1L,
 125   9.088368420359444263703202925095675982530E-3L,
 126 };
 127 static const long double PD[] =
 128 {
 129   3.049081015149226615468111430031590411682E9L,
 130   1.069833887183886839966085436512368982758E8L,
 131   8.259257717868875207333991924545445705394E5L,
 132   1.872583833284143212651746812884298360922E3L,
 133   /* 1.0E0 */
 134 };
 135
 136 static const long double
 137   /* ln 2 */
 138   lg2 = 6.9314718055994530941723212145817656807550E-1L,
 139   lg2_h = 6.9314718055994528622676398299518041312695E-1L,
 140   lg2_l = 2.3190468138462996154948554638754786504121E-17L,
 141   ovt = 8.0085662595372944372e-0017L,
 142   /* 2/(3*log(2)) */
 143   cp = 9.6179669392597560490661645400126142495110E-1L,
 144   cp_h = 9.6179669392597555432899980587535537779331E-1L,
 145   cp_l = 5.0577616648125906047157785230014751039424E-17L;
 146
 147 #ifdef __STDC__
 148 long double
 149 __ieee754_powl (long double x, long double y)
 150 #else
 151 long double
 152 __ieee754_powl (x, y)
 153      long double x, y;
 154 #endif
 155 {
 156   long double z, ax, z_h, z_l, p_h, p_l;
 157   long double y1, t1, t2, r, s, t, u, v, w;
 158   long double s2, s_h, s_l, t_h, t_l;
 159   int32_t i, j, k, yisint, n;
 160   u_int32_t ix, iy;
 161   int32_t hx, hy;
 162   ieee854_long_double_shape_type o, p, q;
 163
 164   p.value = x;
 165   hx = p.parts32.w0;
 166   ix = hx & 0x7fffffff;
 167
 168   q.value = y;
 169   hy = q.parts32.w0;
 170   iy = hy & 0x7fffffff;
 171
 172
 173   /* y==zero: x**0 = 1 */
 174   if ((iy | q.parts32.w1 | (q.parts32.w2 & 0x7fffffff) | q.parts32.w3) == 0)
 175     return one;
 176
 177   /* 1.0**y = 1; -1.0**+-Inf = 1 */
 178   if (x == one)
 179     return one;
 180   if (x == -1.0L && iy == 0x7ff00000
 181       && (q.parts32.w1 | (q.parts32.w2 & 0x7fffffff) | q.parts32.w3) == 0)
 182     return one;
 183
 184   /* +-NaN return x+y */
 185   if ((ix > 0x7ff00000)
 186       || ((ix == 0x7ff00000)
 187           && ((p.parts32.w1 | (p.parts32.w2 & 0x7fffffff) | p.parts32.w3) != 0))
 188       || (iy > 0x7ff00000)
 189       || ((iy == 0x7ff00000)
 190           && ((q.parts32.w1 | (q.parts32.w2 & 0x7fffffff) | q.parts32.w3) != 0)))
 191     return x + y;
 192
 193   /* determine if y is an odd int when x < 0
 194    * yisint = 0       ... y is not an integer
 195    * yisint = 1       ... y is an odd int
 196    * yisint = 2       ... y is an even int
 197    */
 198   yisint = 0;
 199   if (hx < 0)
 200     {
 201       if ((q.parts32.w2 & 0x7fffffff) >= 0x43400000)    /* Low part >= 2^53 */
 202         yisint = 2;             /* even integer y */
 203       else if (iy >= 0x3ff00000)        /* 1.0 */
 204         {
 205           if (__floorl (y) == y)
 206             {
 207               z = 0.5 * y;
 208               if (__floorl (z) == z)
 209                 yisint = 2;
 210               else
 211                 yisint = 1;
 212             }
 213         }
 214     }
 215
 216   /* special value of y */
 217   if ((q.parts32.w1 | (q.parts32.w2 & 0x7fffffff) | q.parts32.w3) == 0)
 218     {
 219       if (iy == 0x7ff00000 && q.parts32.w1 == 0)        /* y is +-inf */
 220         {
 221           if (((ix - 0x3ff00000) | p.parts32.w1
 222                | (p.parts32.w2 & 0x7fffffff) | p.parts32.w3) == 0)
 223             return y - y;       /* inf**+-1 is NaN */
 224           else if (ix > 0x3ff00000 || fabsl (x) > 1.0L)
 225             /* (|x|>1)**+-inf = inf,0 */
 226             return (hy >= 0) ? y : zero;
 227           else
 228             /* (|x|<1)**-,+inf = inf,0 */
 229             return (hy < 0) ? -y : zero;
 230         }
 231       if (iy == 0x3ff00000)
 232         {                       /* y is  +-1 */
 233           if (hy < 0)
 234             return one / x;
 235           else
 236             return x;
 237         }
 238       if (hy == 0x40000000)
 239         return x * x;           /* y is  2 */
 240       if (hy == 0x3fe00000)
 241         {                       /* y is  0.5 */
 242           if (hx >= 0)          /* x >= +0 */
 243             return __ieee754_sqrtl (x);
 244         }
 245     }
 246
 247   ax = fabsl (x);
 248   /* special value of x */
 249   if ((p.parts32.w1 | (p.parts32.w2 & 0x7fffffff) | p.parts32.w3) == 0)
 250     {
 251       if (ix == 0x7ff00000 || ix == 0 || ix == 0x3ff00000)
 252         {
 253           z = ax;               /*x is +-0,+-inf,+-1 */
 254           if (hy < 0)
 255             z = one / z;        /* z = (1/|x|) */
 256           if (hx < 0)
 257             {
 258               if (((ix - 0x3ff00000) | yisint) == 0)
 259                 {
 260                   z = (z - z) / (z - z);        /* (-1)**non-int is NaN */
 261                 }
 262               else if (yisint == 1)
 263                 z = -z;         /* (x<0)**odd = -(|x|**odd) */
 264             }
 265           return z;
 266         }
 267     }
 268
 269   /* (x<0)**(non-int) is NaN */
 270   if (((((u_int32_t) hx >> 31) - 1) | yisint) == 0)
 271     return (x - x) / (x - x);
 272
 273   /* |y| is huge.
 274      2^-16495 = 1/2 of smallest representable value.
 275      If (1 - 1/131072)^y underflows, y > 1.4986e9 */
 276   if (iy > 0x41d654b0)
 277     {
 278       /* if (1 - 2^-113)^y underflows, y > 1.1873e38 */
 279       if (iy > 0x47d654b0)
 280         {
 281           if (ix <= 0x3fefffff)
 282             return (hy < 0) ? huge * huge : tiny * tiny;
 283           if (ix >= 0x3ff00000)
 284             return (hy > 0) ? huge * huge : tiny * tiny;
 285         }
 286       /* over/underflow if x is not close to one */
 287       if (ix < 0x3fefffff)
 288         return (hy < 0) ? huge * huge : tiny * tiny;
 289       if (ix > 0x3ff00000)
 290         return (hy > 0) ? huge * huge : tiny * tiny;
 291     }
 292
 293   n = 0;
 294   /* take care subnormal number */
 295   if (ix < 0x00100000)
 296     {
 297       ax *= two113;
 298       n -= 113;
 299       o.value = ax;
 300       ix = o.parts32.w0;
 301     }
 302   n += ((ix) >> 20) - 0x3ff;
 303   j = ix & 0x000fffff;
 304   /* determine interval */
 305   ix = j | 0x3ff00000;          /* normalize ix */
 306   if (j <= 0x39880)
 307     k = 0;                      /* |x|<sqrt(3/2) */
 308   else if (j < 0xbb670)
 309     k = 1;                      /* |x|<sqrt(3)   */
 310   else
 311     {
 312       k = 0;
 313       n += 1;
 314       ix -= 0x00100000;
 315     }
 316
 317   o.value = ax;
 318   o.value = __scalbnl (o.value, ((int) ((ix - o.parts32.w0) * 2)) >> 21);
 319   ax = o.value;
 320
 321   /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
 322   u = ax - bp[k];               /* bp[0]=1.0, bp[1]=1.5 */
 323   v = one / (ax + bp[k]);
 324   s = u * v;
 325   s_h = s;
 326
 327   o.value = s_h;
 328   o.parts32.w3 = 0;
 329   o.parts32.w2 &= 0xffff8000;
 330   s_h = o.value;
 331   /* t_h=ax+bp[k] High */
 332   t_h = ax + bp[k];
 333   o.value = t_h;
 334   o.parts32.w3 = 0;
 335   o.parts32.w2 &= 0xffff8000;
 336   t_h = o.value;
 337   t_l = ax - (t_h - bp[k]);
 338   s_l = v * ((u - s_h * t_h) - s_h * t_l);
 339   /* compute log(ax) */
 340   s2 = s * s;
 341   u = LN[0] + s2 * (LN[1] + s2 * (LN[2] + s2 * (LN[3] + s2 * LN[4])));
 342   v = LD[0] + s2 * (LD[1] + s2 * (LD[2] + s2 * (LD[3] + s2 * (LD[4] + s2))));
 343   r = s2 * s2 * u / v;
 344   r += s_l * (s_h + s);
 345   s2 = s_h * s_h;
 346   t_h = 3.0 + s2 + r;
 347   o.value = t_h;
 348   o.parts32.w3 = 0;
 349   o.parts32.w2 &= 0xffff8000;
 350   t_h = o.value;
 351   t_l = r - ((t_h - 3.0) - s2);
 352   /* u+v = s*(1+...) */
 353   u = s_h * t_h;
 354   v = s_l * t_h + t_l * s;
 355   /* 2/(3log2)*(s+...) */
 356   p_h = u + v;
 357   o.value = p_h;
 358   o.parts32.w3 = 0;
 359   o.parts32.w2 &= 0xffff8000;
 360   p_h = o.value;
 361   p_l = v - (p_h - u);
 362   z_h = cp_h * p_h;             /* cp_h+cp_l = 2/(3*log2) */
 363   z_l = cp_l * p_h + p_l * cp + dp_l[k];
 364   /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
 365   t = (long double) n;
 366   t1 = (((z_h + z_l) + dp_h[k]) + t);
 367   o.value = t1;
 368   o.parts32.w3 = 0;
 369   o.parts32.w2 &= 0xffff8000;
 370   t1 = o.value;
 371   t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);
 372
 373   /* s (sign of result -ve**odd) = -1 else = 1 */
 374   s = one;
 375   if (((((u_int32_t) hx >> 31) - 1) | (yisint - 1)) == 0)
 376     s = -one;                   /* (-ve)**(odd int) */
 377
 378   /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
 379   y1 = y;
 380   o.value = y1;
 381   o.parts32.w3 = 0;
 382   o.parts32.w2 &= 0xffff8000;
 383   y1 = o.value;
 384   p_l = (y - y1) * t1 + y * t2;
 385   p_h = y1 * t1;
 386   z = p_l + p_h;
 387   o.value = z;
 388   j = o.parts32.w0;
 389   if (j >= 0x40d00000) /* z >= 16384 */
 390     {
 391       /* if z > 16384 */
 392       if (((j - 0x40d00000) | o.parts32.w1
 393         | (o.parts32.w2 & 0x7fffffff) | o.parts32.w3) != 0)
 394         return s * huge * huge; /* overflow */
 395       else
 396         {
 397           if (p_l + ovt > z - p_h)
 398             return s * huge * huge;     /* overflow */
 399         }
 400     }
 401   else if ((j & 0x7fffffff) >= 0x40d01b90)      /* z <= -16495 */
 402     {
 403       /* z < -16495 */
 404       if (((j - 0xc0d01bc0) | o.parts32.w1
 405          | (o.parts32.w2 & 0x7fffffff) | o.parts32.w3) != 0)
 406         return s * tiny * tiny; /* underflow */
 407       else
 408         {
 409           if (p_l <= z - p_h)
 410             return s * tiny * tiny;     /* underflow */
 411         }
 412     }
 413   /* compute 2**(p_h+p_l) */
 414   i = j & 0x7fffffff;
 415   k = (i >> 20) - 0x3ff;
 416   n = 0;
 417   if (i > 0x3fe00000)
 418     {                           /* if |z| > 0.5, set n = [z+0.5] */
 419       n = __floorl (z + 0.5L);
 420       t = n;
 421       p_h -= t;
 422     }
 423   t = p_l + p_h;
 424   o.value = t;
 425   o.parts32.w3 = 0;
 426   o.parts32.w2 &= 0xffff8000;
 427   t = o.value;
 428   u = t * lg2_h;
 429   v = (p_l - (t - p_h)) * lg2 + t * lg2_l;
 430   z = u + v;
 431   w = v - (z - u);
 432   /*  exp(z) */
 433   t = z * z;
 434   u = PN[0] + t * (PN[1] + t * (PN[2] + t * (PN[3] + t * PN[4])));
 435   v = PD[0] + t * (PD[1] + t * (PD[2] + t * (PD[3] + t)));
 436   t1 = z - t * u / v;
 437   r = (z * t1) / (t1 - two) - (w + z * w);
 438   z = one - (r - z);
 439   z = __scalbnl (z, n);
 440   return s * z;
 441 }