sysdeps/ieee754/ldbl-128ibm/e_powl.c

   1 /*
   2  * ====================================================
   3  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
   4  *
   5  * Developed at SunPro, a Sun Microsystems, Inc. business.
   6  * Permission to use, copy, modify, and distribute this
   7  * software is freely granted, provided that this notice
   8  * is preserved.
   9  * ====================================================
  10  */
  11
  12 /* Expansions and modifications for 128-bit long double are
  13    Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
  14    and are incorporated herein by permission of the author.  The author
  15    reserves the right to distribute this material elsewhere under different
  16    copying permissions.  These modifications are distributed here under
  17    the following terms:
  18
  19     This library is free software; you can redistribute it and/or
  20     modify it under the terms of the GNU Lesser General Public
  21     License as published by the Free Software Foundation; either
  22     version 2.1 of the License, or (at your option) any later version.
  23
  24     This library is distributed in the hope that it will be useful,
  25     but WITHOUT ANY WARRANTY; without even the implied warranty of
  26     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  27     Lesser General Public License for more details.
  28
  29     You should have received a copy of the GNU Lesser General Public
  30     License along with this library; if not, see
  31     <http://www.gnu.org/licenses/>.  */
  32
  33 /* __ieee754_powl(x,y) return x**y
  34  *
  35  *                    n
  36  * Method:  Let x =  2   * (1+f)
  37  *      1. Compute and return log2(x) in two pieces:
  38  *              log2(x) = w1 + w2,
  39  *         where w1 has 113-53 = 60 bit trailing zeros.
  40  *      2. Perform y*log2(x) = n+y' by simulating muti-precision
  41  *         arithmetic, where |y'|<=0.5.
  42  *      3. Return x**y = 2**n*exp(y'*log2)
  43  *
  44  * Special cases:
  45  *      1.  (anything) ** 0  is 1
  46  *      2.  (anything) ** 1  is itself
  47  *      3.  (anything) ** NAN is NAN
  48  *      4.  NAN ** (anything except 0) is NAN
  49  *      5.  +-(|x| > 1) **  +INF is +INF
  50  *      6.  +-(|x| > 1) **  -INF is +0
  51  *      7.  +-(|x| < 1) **  +INF is +0
  52  *      8.  +-(|x| < 1) **  -INF is +INF
  53  *      9.  +-1         ** +-INF is NAN
  54  *      10. +0 ** (+anything except 0, NAN)               is +0
  55  *      11. -0 ** (+anything except 0, NAN, odd integer)  is +0
  56  *      12. +0 ** (-anything except 0, NAN)               is +INF
  57  *      13. -0 ** (-anything except 0, NAN, odd integer)  is +INF
  58  *      14. -0 ** (odd integer) = -( +0 ** (odd integer) )
  59  *      15. +INF ** (+anything except 0,NAN) is +INF
  60  *      16. +INF ** (-anything except 0,NAN) is +0
  61  *      17. -INF ** (anything)  = -0 ** (-anything)
  62  *      18. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
  63  *      19. (-anything except 0 and inf) ** (non-integer) is NAN
  64  *
  65  */
  66
  67 #include <math.h>
  68 #include <math_private.h>
  69
  70 static const long double bp[] = {
  71   1.0L,
  72   1.5L,
  73 };
  74
  75 /* log_2(1.5) */
  76 static const long double dp_h[] = {
  77   0.0,
  78   5.8496250072115607565592654282227158546448E-1L
  79 };
  80
  81 /* Low part of log_2(1.5) */
  82 static const long double dp_l[] = {
  83   0.0,
  84   1.0579781240112554492329533686862998106046E-16L
  85 };
  86
  87 static const long double zero = 0.0L,
  88   one = 1.0L,
  89   two = 2.0L,
  90   two113 = 1.0384593717069655257060992658440192E34L,
  91   huge = 1.0e300L,
  92   tiny = 1.0e-300L;
  93
  94 /* 3/2 log x = 3 z + z^3 + z^3 (z^2 R(z^2))
  95    z = (x-1)/(x+1)
  96    1 <= x <= 1.25
  97    Peak relative error 2.3e-37 */
  98 static const long double LN[] =
  99 {
 100  -3.0779177200290054398792536829702930623200E1L,
 101   6.5135778082209159921251824580292116201640E1L,
 102  -4.6312921812152436921591152809994014413540E1L,
 103   1.2510208195629420304615674658258363295208E1L,
 104  -9.9266909031921425609179910128531667336670E-1L
 105 };
 106 static const long double LD[] =
 107 {
 108  -5.129862866715009066465422805058933131960E1L,
 109   1.452015077564081884387441590064272782044E2L,
 110  -1.524043275549860505277434040464085593165E2L,
 111   7.236063513651544224319663428634139768808E1L,
 112  -1.494198912340228235853027849917095580053E1L
 113   /* 1.0E0 */
 114 };
 115
 116 /* exp(x) = 1 + x - x / (1 - 2 / (x - x^2 R(x^2)))
 117    0 <= x <= 0.5
 118    Peak relative error 5.7e-38  */
 119 static const long double PN[] =
 120 {
 121   5.081801691915377692446852383385968225675E8L,
 122   9.360895299872484512023336636427675327355E6L,
 123   4.213701282274196030811629773097579432957E4L,
 124   5.201006511142748908655720086041570288182E1L,
 125   9.088368420359444263703202925095675982530E-3L,
 126 };
 127 static const long double PD[] =
 128 {
 129   3.049081015149226615468111430031590411682E9L,
 130   1.069833887183886839966085436512368982758E8L,
 131   8.259257717868875207333991924545445705394E5L,
 132   1.872583833284143212651746812884298360922E3L,
 133   /* 1.0E0 */
 134 };
 135
 136 static const long double
 137   /* ln 2 */
 138   lg2 = 6.9314718055994530941723212145817656807550E-1L,
 139   lg2_h = 6.9314718055994528622676398299518041312695E-1L,
 140   lg2_l = 2.3190468138462996154948554638754786504121E-17L,
 141   ovt = 8.0085662595372944372e-0017L,
 142   /* 2/(3*log(2)) */
 143   cp = 9.6179669392597560490661645400126142495110E-1L,
 144   cp_h = 9.6179669392597555432899980587535537779331E-1L,
 145   cp_l = 5.0577616648125906047157785230014751039424E-17L;
 146
 147 long double
 148 __ieee754_powl (long double x, long double y)
 149 {
 150   long double z, ax, z_h, z_l, p_h, p_l;
 151   long double y1, t1, t2, r, s, sgn, t, u, v, w;
 152   long double s2, s_h, s_l, t_h, t_l, ay;
 153   int32_t i, j, k, yisint, n;
 154   uint32_t ix, iy;
 155   int32_t hx, hy, hax;
 156   double ohi, xhi, xlo, yhi, ylo;
 157   uint32_t lx, ly, lj;
 158
 159   ldbl_unpack (x, &xhi, &xlo);
 160   EXTRACT_WORDS (hx, lx, xhi);
 161   ix = hx & 0x7fffffff;
 162
 163   ldbl_unpack (y, &yhi, &ylo);
 164   EXTRACT_WORDS (hy, ly, yhi);
 165   iy = hy & 0x7fffffff;
 166
 167   /* y==zero: x**0 = 1 */
 168   if ((iy | ly) == 0 && !issignaling (x))
 169     return one;
 170
 171   /* 1.0**y = 1; -1.0**+-Inf = 1 */
 172   if (x == one && !issignaling (y))
 173     return one;
 174   if (x == -1.0L && ((iy - 0x7ff00000) | ly) == 0)
 175     return one;
 176
 177   /* +-NaN return x+y */
 178   if ((ix >= 0x7ff00000 && ((ix - 0x7ff00000) | lx) != 0)
 179       || (iy >= 0x7ff00000 && ((iy - 0x7ff00000) | ly) != 0))
 180     return x + y;
 181
 182   /* determine if y is an odd int when x < 0
 183    * yisint = 0       ... y is not an integer
 184    * yisint = 1       ... y is an odd int
 185    * yisint = 2       ... y is an even int
 186    */
 187   yisint = 0;
 188   if (hx < 0)
 189     {
 190       uint32_t low_ye;
 191
 192       GET_HIGH_WORD (low_ye, ylo);
 193       if ((low_ye & 0x7fffffff) >= 0x43400000)  /* Low part >= 2^53 */
 194         yisint = 2;             /* even integer y */
 195       else if (iy >= 0x3ff00000)        /* 1.0 */
 196         {
 197           if (__floorl (y) == y)
 198             {
 199               z = 0.5 * y;
 200               if (__floorl (z) == z)
 201                 yisint = 2;
 202               else
 203                 yisint = 1;
 204             }
 205         }
 206     }
 207
 208   ax = fabsl (x);
 209
 210   /* special value of y */
 211   if (ly == 0)
 212     {
 213       if (iy == 0x7ff00000)     /* y is +-inf */
 214         {
 215           if (ax > one)
 216             /* (|x|>1)**+-inf = inf,0 */
 217             return (hy >= 0) ? y : zero;
 218           else
 219             /* (|x|<1)**-,+inf = inf,0 */
 220             return (hy < 0) ? -y : zero;
 221         }
 222       if (ylo == 0.0)
 223         {
 224           if (iy == 0x3ff00000)
 225             {                   /* y is  +-1 */
 226               if (hy < 0)
 227                 return one / x;
 228               else
 229                 return x;
 230             }
 231           if (hy == 0x40000000)
 232             return x * x;               /* y is  2 */
 233           if (hy == 0x3fe00000)
 234             {                   /* y is  0.5 */
 235               if (hx >= 0)              /* x >= +0 */
 236                 return __ieee754_sqrtl (x);
 237             }
 238         }
 239     }
 240
 241   /* special value of x */
 242   if (lx == 0)
 243     {
 244       if (ix == 0x7ff00000 || ix == 0 || (ix == 0x3ff00000 && xlo == 0.0))
 245         {
 246           z = ax;               /*x is +-0,+-inf,+-1 */
 247           if (hy < 0)
 248             z = one / z;        /* z = (1/|x|) */
 249           if (hx < 0)
 250             {
 251               if (((ix - 0x3ff00000) | yisint) == 0)
 252                 {
 253                   z = (z - z) / (z - z);        /* (-1)**non-int is NaN */
 254                 }
 255               else if (yisint == 1)
 256                 z = -z;         /* (x<0)**odd = -(|x|**odd) */
 257             }
 258           return z;
 259         }
 260     }
 261
 262   /* (x<0)**(non-int) is NaN */
 263   if (((((uint32_t) hx >> 31) - 1) | yisint) == 0)
 264     return (x - x) / (x - x);
 265
 266   /* sgn (sign of result -ve**odd) = -1 else = 1 */
 267   sgn = one;
 268   if (((((uint32_t) hx >> 31) - 1) | (yisint - 1)) == 0)
 269     sgn = -one;                 /* (-ve)**(odd int) */
 270
 271   /* |y| is huge.
 272      2^-16495 = 1/2 of smallest representable value.
 273      If (1 - 1/131072)^y underflows, y > 1.4986e9 */
 274   if (iy > 0x41d654b0)
 275     {
 276       /* if (1 - 2^-113)^y underflows, y > 1.1873e38 */
 277       if (iy > 0x47d654b0)
 278         {
 279           if (ix <= 0x3fefffff)
 280             return (hy < 0) ? sgn * huge * huge : sgn * tiny * tiny;
 281           if (ix >= 0x3ff00000)
 282             return (hy > 0) ? sgn * huge * huge : sgn * tiny * tiny;
 283         }
 284       /* over/underflow if x is not close to one */
 285       if (ix < 0x3fefffff)
 286         return (hy < 0) ? sgn * huge * huge : sgn * tiny * tiny;
 287       if (ix > 0x3ff00000)
 288         return (hy > 0) ? sgn * huge * huge : sgn * tiny * tiny;
 289     }
 290
 291   ay = y > 0 ? y : -y;
 292   if (ay < 0x1p-117)
 293     y = y < 0 ? -0x1p-117 : 0x1p-117;
 294
 295   n = 0;
 296   /* take care subnormal number */
 297   if (ix < 0x00100000)
 298     {
 299       ax *= two113;
 300       n -= 113;
 301       ohi = ldbl_high (ax);
 302       GET_HIGH_WORD (ix, ohi);
 303     }
 304   n += ((ix) >> 20) - 0x3ff;
 305   j = ix & 0x000fffff;
 306   /* determine interval */
 307   ix = j | 0x3ff00000;          /* normalize ix */
 308   if (j <= 0x39880)
 309     k = 0;                      /* |x|<sqrt(3/2) */
 310   else if (j < 0xbb670)
 311     k = 1;                      /* |x|<sqrt(3)   */
 312   else
 313     {
 314       k = 0;
 315       n += 1;
 316       ix -= 0x00100000;
 317     }
 318
 319   ohi = ldbl_high (ax);
 320   GET_HIGH_WORD (hax, ohi);
 321   ax = __scalbnl (ax, ((int) ((ix - hax) * 2)) >> 21);
 322
 323   /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
 324   u = ax - bp[k];               /* bp[0]=1.0, bp[1]=1.5 */
 325   v = one / (ax + bp[k]);
 326   s = u * v;
 327   s_h = ldbl_high (s);
 328
 329   /* t_h=ax+bp[k] High */
 330   t_h = ax + bp[k];
 331   t_h = ldbl_high (t_h);
 332   t_l = ax - (t_h - bp[k]);
 333   s_l = v * ((u - s_h * t_h) - s_h * t_l);
 334   /* compute log(ax) */
 335   s2 = s * s;
 336   u = LN[0] + s2 * (LN[1] + s2 * (LN[2] + s2 * (LN[3] + s2 * LN[4])));
 337   v = LD[0] + s2 * (LD[1] + s2 * (LD[2] + s2 * (LD[3] + s2 * (LD[4] + s2))));
 338   r = s2 * s2 * u / v;
 339   r += s_l * (s_h + s);
 340   s2 = s_h * s_h;
 341   t_h = 3.0 + s2 + r;
 342   t_h = ldbl_high (t_h);
 343   t_l = r - ((t_h - 3.0) - s2);
 344   /* u+v = s*(1+...) */
 345   u = s_h * t_h;
 346   v = s_l * t_h + t_l * s;
 347   /* 2/(3log2)*(s+...) */
 348   p_h = u + v;
 349   p_h = ldbl_high (p_h);
 350   p_l = v - (p_h - u);
 351   z_h = cp_h * p_h;             /* cp_h+cp_l = 2/(3*log2) */
 352   z_l = cp_l * p_h + p_l * cp + dp_l[k];
 353   /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
 354   t = (long double) n;
 355   t1 = (((z_h + z_l) + dp_h[k]) + t);
 356   t1 = ldbl_high (t1);
 357   t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);
 358
 359   /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
 360   y1 = ldbl_high (y);
 361   p_l = (y - y1) * t1 + y * t2;
 362   p_h = y1 * t1;
 363   z = p_l + p_h;
 364   ohi = ldbl_high (z);
 365   EXTRACT_WORDS (j, lj, ohi);
 366   if (j >= 0x40d00000) /* z >= 16384 */
 367     {
 368       /* if z > 16384 */
 369       if (((j - 0x40d00000) | lj) != 0)
 370         return sgn * huge * huge;       /* overflow */
 371       else
 372         {
 373           if (p_l + ovt > z - p_h)
 374             return sgn * huge * huge;   /* overflow */
 375         }
 376     }
 377   else if ((j & 0x7fffffff) >= 0x40d01b90)      /* z <= -16495 */
 378     {
 379       /* z < -16495 */
 380       if (((j - 0xc0d01bc0) | lj) != 0)
 381         return sgn * tiny * tiny;       /* underflow */
 382       else
 383         {
 384           if (p_l <= z - p_h)
 385             return sgn * tiny * tiny;   /* underflow */
 386         }
 387     }
 388   /* compute 2**(p_h+p_l) */
 389   i = j & 0x7fffffff;
 390   k = (i >> 20) - 0x3ff;
 391   n = 0;
 392   if (i > 0x3fe00000)
 393     {                           /* if |z| > 0.5, set n = [z+0.5] */
 394       n = __floorl (z + 0.5L);
 395       t = n;
 396       p_h -= t;
 397     }
 398   t = p_l + p_h;
 399   t = ldbl_high (t);
 400   u = t * lg2_h;
 401   v = (p_l - (t - p_h)) * lg2 + t * lg2_l;
 402   z = u + v;
 403   w = v - (z - u);
 404   /*  exp(z) */
 405   t = z * z;
 406   u = PN[0] + t * (PN[1] + t * (PN[2] + t * (PN[3] + t * PN[4])));
 407   v = PD[0] + t * (PD[1] + t * (PD[2] + t * (PD[3] + t)));
 408   t1 = z - t * u / v;
 409   r = (z * t1) / (t1 - two) - (w + z * w);
 410   z = one - (r - z);
 411   z = __scalbnl (sgn * z, n);
 412   math_check_force_underflow (z);
 413   return z;
 414 }
 415 strong_alias (__ieee754_powl, __powl_finite)