sysdeps/ieee754/ldbl-128/e_powl.c

   1 /*
   2  * ====================================================
   3  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
   4  *
   5  * Developed at SunPro, a Sun Microsystems, Inc. business.
   6  * Permission to use, copy, modify, and distribute this
   7  * software is freely granted, provided that this notice
   8  * is preserved.
   9  * ====================================================
  10  */
  11
  12 /* Expansions and modifications for 128-bit long double are
  13    Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
  14    and are incorporated herein by permission of the author.  The author
  15    reserves the right to distribute this material elsewhere under different
  16    copying permissions.  These modifications are distributed here under
  17    the following terms:
  18
  19     This library is free software; you can redistribute it and/or
  20     modify it under the terms of the GNU Lesser General Public
  21     License as published by the Free Software Foundation; either
  22     version 2.1 of the License, or (at your option) any later version.
  23
  24     This library is distributed in the hope that it will be useful,
  25     but WITHOUT ANY WARRANTY; without even the implied warranty of
  26     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  27     Lesser General Public License for more details.
  28
  29     You should have received a copy of the GNU Lesser General Public
  30     License along with this library; if not, see
  31     <http://www.gnu.org/licenses/>.  */
  32
  33 /* __ieee754_powl(x,y) return x**y
  34  *
  35  *                    n
  36  * Method:  Let x =  2   * (1+f)
  37  *      1. Compute and return log2(x) in two pieces:
  38  *              log2(x) = w1 + w2,
  39  *         where w1 has 113-53 = 60 bit trailing zeros.
  40  *      2. Perform y*log2(x) = n+y' by simulating muti-precision
  41  *         arithmetic, where |y'|<=0.5.
  42  *      3. Return x**y = 2**n*exp(y'*log2)
  43  *
  44  * Special cases:
  45  *      1.  (anything) ** 0  is 1
  46  *      2.  (anything) ** 1  is itself
  47  *      3.  (anything) ** NAN is NAN
  48  *      4.  NAN ** (anything except 0) is NAN
  49  *      5.  +-(|x| > 1) **  +INF is +INF
  50  *      6.  +-(|x| > 1) **  -INF is +0
  51  *      7.  +-(|x| < 1) **  +INF is +0
  52  *      8.  +-(|x| < 1) **  -INF is +INF
  53  *      9.  +-1         ** +-INF is NAN
  54  *      10. +0 ** (+anything except 0, NAN)               is +0
  55  *      11. -0 ** (+anything except 0, NAN, odd integer)  is +0
  56  *      12. +0 ** (-anything except 0, NAN)               is +INF
  57  *      13. -0 ** (-anything except 0, NAN, odd integer)  is +INF
  58  *      14. -0 ** (odd integer) = -( +0 ** (odd integer) )
  59  *      15. +INF ** (+anything except 0,NAN) is +INF
  60  *      16. +INF ** (-anything except 0,NAN) is +0
  61  *      17. -INF ** (anything)  = -0 ** (-anything)
  62  *      18. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
  63  *      19. (-anything except 0 and inf) ** (non-integer) is NAN
  64  *
  65  */
  66
  67 #include "math.h"
  68 #include "math_private.h"
  69
  70 static const long double bp[] = {
  71   1.0L,
  72   1.5L,
  73 };
  74
  75 /* log_2(1.5) */
  76 static const long double dp_h[] = {
  77   0.0,
  78   5.8496250072115607565592654282227158546448E-1L
  79 };
  80
  81 /* Low part of log_2(1.5) */
  82 static const long double dp_l[] = {
  83   0.0,
  84   1.0579781240112554492329533686862998106046E-16L
  85 };
  86
  87 static const long double zero = 0.0L,
  88   one = 1.0L,
  89   two = 2.0L,
  90   two113 = 1.0384593717069655257060992658440192E34L,
  91   huge = 1.0e3000L,
  92   tiny = 1.0e-3000L;
  93
  94 /* 3/2 log x = 3 z + z^3 + z^3 (z^2 R(z^2))
  95    z = (x-1)/(x+1)
  96    1 <= x <= 1.25
  97    Peak relative error 2.3e-37 */
  98 static const long double LN[] =
  99 {
 100  -3.0779177200290054398792536829702930623200E1L,
 101   6.5135778082209159921251824580292116201640E1L,
 102  -4.6312921812152436921591152809994014413540E1L,
 103   1.2510208195629420304615674658258363295208E1L,
 104  -9.9266909031921425609179910128531667336670E-1L
 105 };
 106 static const long double LD[] =
 107 {
 108  -5.129862866715009066465422805058933131960E1L,
 109   1.452015077564081884387441590064272782044E2L,
 110  -1.524043275549860505277434040464085593165E2L,
 111   7.236063513651544224319663428634139768808E1L,
 112  -1.494198912340228235853027849917095580053E1L
 113   /* 1.0E0 */
 114 };
 115
 116 /* exp(x) = 1 + x - x / (1 - 2 / (x - x^2 R(x^2)))
 117    0 <= x <= 0.5
 118    Peak relative error 5.7e-38  */
 119 static const long double PN[] =
 120 {
 121   5.081801691915377692446852383385968225675E8L,
 122   9.360895299872484512023336636427675327355E6L,
 123   4.213701282274196030811629773097579432957E4L,
 124   5.201006511142748908655720086041570288182E1L,
 125   9.088368420359444263703202925095675982530E-3L,
 126 };
 127 static const long double PD[] =
 128 {
 129   3.049081015149226615468111430031590411682E9L,
 130   1.069833887183886839966085436512368982758E8L,
 131   8.259257717868875207333991924545445705394E5L,
 132   1.872583833284143212651746812884298360922E3L,
 133   /* 1.0E0 */
 134 };
 135
 136 static const long double
 137   /* ln 2 */
 138   lg2 = 6.9314718055994530941723212145817656807550E-1L,
 139   lg2_h = 6.9314718055994528622676398299518041312695E-1L,
 140   lg2_l = 2.3190468138462996154948554638754786504121E-17L,
 141   ovt = 8.0085662595372944372e-0017L,
 142   /* 2/(3*log(2)) */
 143   cp = 9.6179669392597560490661645400126142495110E-1L,
 144   cp_h = 9.6179669392597555432899980587535537779331E-1L,
 145   cp_l = 5.0577616648125906047157785230014751039424E-17L;
 146
 147 long double
 148 __ieee754_powl (long double x, long double y)
 149 {
 150   long double z, ax, z_h, z_l, p_h, p_l;
 151   long double y1, t1, t2, r, s, t, u, v, w;
 152   long double s2, s_h, s_l, t_h, t_l;
 153   int32_t i, j, k, yisint, n;
 154   u_int32_t ix, iy;
 155   int32_t hx, hy;
 156   ieee854_long_double_shape_type o, p, q;
 157
 158   p.value = x;
 159   hx = p.parts32.w0;
 160   ix = hx & 0x7fffffff;
 161
 162   q.value = y;
 163   hy = q.parts32.w0;
 164   iy = hy & 0x7fffffff;
 165
 166
 167   /* y==zero: x**0 = 1 */
 168   if ((iy | q.parts32.w1 | q.parts32.w2 | q.parts32.w3) == 0)
 169     return one;
 170
 171   /* 1.0**y = 1; -1.0**+-Inf = 1 */
 172   if (x == one)
 173     return one;
 174   if (x == -1.0L && iy == 0x7fff0000
 175       && (q.parts32.w1 | q.parts32.w2 | q.parts32.w3) == 0)
 176     return one;
 177
 178   /* +-NaN return x+y */
 179   if ((ix > 0x7fff0000)
 180       || ((ix == 0x7fff0000)
 181           && ((p.parts32.w1 | p.parts32.w2 | p.parts32.w3) != 0))
 182       || (iy > 0x7fff0000)
 183       || ((iy == 0x7fff0000)
 184           && ((q.parts32.w1 | q.parts32.w2 | q.parts32.w3) != 0)))
 185     return x + y;
 186
 187   /* determine if y is an odd int when x < 0
 188    * yisint = 0       ... y is not an integer
 189    * yisint = 1       ... y is an odd int
 190    * yisint = 2       ... y is an even int
 191    */
 192   yisint = 0;
 193   if (hx < 0)
 194     {
 195       if (iy >= 0x40700000)     /* 2^113 */
 196         yisint = 2;             /* even integer y */
 197       else if (iy >= 0x3fff0000)        /* 1.0 */
 198         {
 199           if (__floorl (y) == y)
 200             {
 201               z = 0.5 * y;
 202               if (__floorl (z) == z)
 203                 yisint = 2;
 204               else
 205                 yisint = 1;
 206             }
 207         }
 208     }
 209
 210   /* special value of y */
 211   if ((q.parts32.w1 | q.parts32.w2 | q.parts32.w3) == 0)
 212     {
 213       if (iy == 0x7fff0000)     /* y is +-inf */
 214         {
 215           if (((ix - 0x3fff0000) | p.parts32.w1 | p.parts32.w2 | p.parts32.w3)
 216               == 0)
 217             return y - y;       /* +-1**inf is NaN */
 218           else if (ix >= 0x3fff0000)    /* (|x|>1)**+-inf = inf,0 */
 219             return (hy >= 0) ? y : zero;
 220           else                  /* (|x|<1)**-,+inf = inf,0 */
 221             return (hy < 0) ? -y : zero;
 222         }
 223       if (iy == 0x3fff0000)
 224         {                       /* y is  +-1 */
 225           if (hy < 0)
 226             return one / x;
 227           else
 228             return x;
 229         }
 230       if (hy == 0x40000000)
 231         return x * x;           /* y is  2 */
 232       if (hy == 0x3ffe0000)
 233         {                       /* y is  0.5 */
 234           if (hx >= 0)          /* x >= +0 */
 235             return __ieee754_sqrtl (x);
 236         }
 237     }
 238
 239   ax = fabsl (x);
 240   /* special value of x */
 241   if ((p.parts32.w1 | p.parts32.w2 | p.parts32.w3) == 0)
 242     {
 243       if (ix == 0x7fff0000 || ix == 0 || ix == 0x3fff0000)
 244         {
 245           z = ax;               /*x is +-0,+-inf,+-1 */
 246           if (hy < 0)
 247             z = one / z;        /* z = (1/|x|) */
 248           if (hx < 0)
 249             {
 250               if (((ix - 0x3fff0000) | yisint) == 0)
 251                 {
 252                   z = (z - z) / (z - z);        /* (-1)**non-int is NaN */
 253                 }
 254               else if (yisint == 1)
 255                 z = -z;         /* (x<0)**odd = -(|x|**odd) */
 256             }
 257           return z;
 258         }
 259     }
 260
 261   /* (x<0)**(non-int) is NaN */
 262   if (((((u_int32_t) hx >> 31) - 1) | yisint) == 0)
 263     return (x - x) / (x - x);
 264
 265   /* |y| is huge.
 266      2^-16495 = 1/2 of smallest representable value.
 267      If (1 - 1/131072)^y underflows, y > 1.4986e9 */
 268   if (iy > 0x401d654b)
 269     {
 270       /* if (1 - 2^-113)^y underflows, y > 1.1873e38 */
 271       if (iy > 0x407d654b)
 272         {
 273           if (ix <= 0x3ffeffff)
 274             return (hy < 0) ? huge * huge : tiny * tiny;
 275           if (ix >= 0x3fff0000)
 276             return (hy > 0) ? huge * huge : tiny * tiny;
 277         }
 278       /* over/underflow if x is not close to one */
 279       if (ix < 0x3ffeffff)
 280         return (hy < 0) ? huge * huge : tiny * tiny;
 281       if (ix > 0x3fff0000)
 282         return (hy > 0) ? huge * huge : tiny * tiny;
 283     }
 284
 285   n = 0;
 286   /* take care subnormal number */
 287   if (ix < 0x00010000)
 288     {
 289       ax *= two113;
 290       n -= 113;
 291       o.value = ax;
 292       ix = o.parts32.w0;
 293     }
 294   n += ((ix) >> 16) - 0x3fff;
 295   j = ix & 0x0000ffff;
 296   /* determine interval */
 297   ix = j | 0x3fff0000;          /* normalize ix */
 298   if (j <= 0x3988)
 299     k = 0;                      /* |x|<sqrt(3/2) */
 300   else if (j < 0xbb67)
 301     k = 1;                      /* |x|<sqrt(3)   */
 302   else
 303     {
 304       k = 0;
 305       n += 1;
 306       ix -= 0x00010000;
 307     }
 308
 309   o.value = ax;
 310   o.parts32.w0 = ix;
 311   ax = o.value;
 312
 313   /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
 314   u = ax - bp[k];               /* bp[0]=1.0, bp[1]=1.5 */
 315   v = one / (ax + bp[k]);
 316   s = u * v;
 317   s_h = s;
 318
 319   o.value = s_h;
 320   o.parts32.w3 = 0;
 321   o.parts32.w2 &= 0xf8000000;
 322   s_h = o.value;
 323   /* t_h=ax+bp[k] High */
 324   t_h = ax + bp[k];
 325   o.value = t_h;
 326   o.parts32.w3 = 0;
 327   o.parts32.w2 &= 0xf8000000;
 328   t_h = o.value;
 329   t_l = ax - (t_h - bp[k]);
 330   s_l = v * ((u - s_h * t_h) - s_h * t_l);
 331   /* compute log(ax) */
 332   s2 = s * s;
 333   u = LN[0] + s2 * (LN[1] + s2 * (LN[2] + s2 * (LN[3] + s2 * LN[4])));
 334   v = LD[0] + s2 * (LD[1] + s2 * (LD[2] + s2 * (LD[3] + s2 * (LD[4] + s2))));
 335   r = s2 * s2 * u / v;
 336   r += s_l * (s_h + s);
 337   s2 = s_h * s_h;
 338   t_h = 3.0 + s2 + r;
 339   o.value = t_h;
 340   o.parts32.w3 = 0;
 341   o.parts32.w2 &= 0xf8000000;
 342   t_h = o.value;
 343   t_l = r - ((t_h - 3.0) - s2);
 344   /* u+v = s*(1+...) */
 345   u = s_h * t_h;
 346   v = s_l * t_h + t_l * s;
 347   /* 2/(3log2)*(s+...) */
 348   p_h = u + v;
 349   o.value = p_h;
 350   o.parts32.w3 = 0;
 351   o.parts32.w2 &= 0xf8000000;
 352   p_h = o.value;
 353   p_l = v - (p_h - u);
 354   z_h = cp_h * p_h;             /* cp_h+cp_l = 2/(3*log2) */
 355   z_l = cp_l * p_h + p_l * cp + dp_l[k];
 356   /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
 357   t = (long double) n;
 358   t1 = (((z_h + z_l) + dp_h[k]) + t);
 359   o.value = t1;
 360   o.parts32.w3 = 0;
 361   o.parts32.w2 &= 0xf8000000;
 362   t1 = o.value;
 363   t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);
 364
 365   /* s (sign of result -ve**odd) = -1 else = 1 */
 366   s = one;
 367   if (((((u_int32_t) hx >> 31) - 1) | (yisint - 1)) == 0)
 368     s = -one;                   /* (-ve)**(odd int) */
 369
 370   /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
 371   y1 = y;
 372   o.value = y1;
 373   o.parts32.w3 = 0;
 374   o.parts32.w2 &= 0xf8000000;
 375   y1 = o.value;
 376   p_l = (y - y1) * t1 + y * t2;
 377   p_h = y1 * t1;
 378   z = p_l + p_h;
 379   o.value = z;
 380   j = o.parts32.w0;
 381   if (j >= 0x400d0000) /* z >= 16384 */
 382     {
 383       /* if z > 16384 */
 384       if (((j - 0x400d0000) | o.parts32.w1 | o.parts32.w2 | o.parts32.w3) != 0)
 385         return s * huge * huge; /* overflow */
 386       else
 387         {
 388           if (p_l + ovt > z - p_h)
 389             return s * huge * huge;     /* overflow */
 390         }
 391     }
 392   else if ((j & 0x7fffffff) >= 0x400d01b9)      /* z <= -16495 */
 393     {
 394       /* z < -16495 */
 395       if (((j - 0xc00d01bc) | o.parts32.w1 | o.parts32.w2 | o.parts32.w3)
 396           != 0)
 397         return s * tiny * tiny; /* underflow */
 398       else
 399         {
 400           if (p_l <= z - p_h)
 401             return s * tiny * tiny;     /* underflow */
 402         }
 403     }
 404   /* compute 2**(p_h+p_l) */
 405   i = j & 0x7fffffff;
 406   k = (i >> 16) - 0x3fff;
 407   n = 0;
 408   if (i > 0x3ffe0000)
 409     {                           /* if |z| > 0.5, set n = [z+0.5] */
 410       n = __floorl (z + 0.5L);
 411       t = n;
 412       p_h -= t;
 413     }
 414   t = p_l + p_h;
 415   o.value = t;
 416   o.parts32.w3 = 0;
 417   o.parts32.w2 &= 0xf8000000;
 418   t = o.value;
 419   u = t * lg2_h;
 420   v = (p_l - (t - p_h)) * lg2 + t * lg2_l;
 421   z = u + v;
 422   w = v - (z - u);
 423   /*  exp(z) */
 424   t = z * z;
 425   u = PN[0] + t * (PN[1] + t * (PN[2] + t * (PN[3] + t * PN[4])));
 426   v = PD[0] + t * (PD[1] + t * (PD[2] + t * (PD[3] + t)));
 427   t1 = z - t * u / v;
 428   r = (z * t1) / (t1 - two) - (w + z * w);
 429   z = one - (r - z);
 430   o.value = z;
 431   j = o.parts32.w0;
 432   j += (n << 16);
 433   if ((j >> 16) <= 0)
 434     z = __scalbnl (z, n);       /* subnormal output */
 435   else
 436     {
 437       o.parts32.w0 = j;
 438       z = o.value;
 439     }
 440   return s * z;
 441 }
 442 strong_alias (__ieee754_powl, __powl_finite)