sysdeps/x86/fpu/powl_helper.c

   1 /* Implement powl for x86 using extra-precision log.
   2    Copyright (C) 2012-2014 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Lesser General Public
   7    License as published by the Free Software Foundation; either
   8    version 2.1 of the License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Lesser General Public License for more details.
  14
  15    You should have received a copy of the GNU Lesser General Public
  16    License along with the GNU C Library; if not, see
  17    <http://www.gnu.org/licenses/>.  */
  18
  19 #include <math.h>
  20 #include <math_private.h>
  21
  22 /* High parts and low parts of -log (k/16), for integer k from 12 to
  23    24.  */
  24
  25 static const long double powl_log_table[] =
  26   {
  27     0x4.9a58844d36e49e1p-4L, -0x1.0522624fd558f574p-68L,
  28     0x3.527da7915b3c6de4p-4L, 0x1.7d4ef4b901b99b9ep-68L,
  29     0x2.22f1d044fc8f7bc8p-4L, -0x1.8e97c071a42fc388p-68L,
  30     0x1.08598b59e3a0688ap-4L, 0x3.fd9bf503372c12fcp-72L,
  31     -0x0p+0L, 0x0p+0L,
  32     -0xf.85186008b15330cp-8L, 0x1.9b47488a6687672cp-72L,
  33     -0x1.e27076e2af2e5e9ep-4L, -0xa.87ffe1fe9e155dcp-72L,
  34     -0x2.bfe60e14f27a791p-4L, 0x1.83bebf1bdb88a032p-68L,
  35     -0x3.91fef8f353443584p-4L, -0xb.b03de5ff734495cp-72L,
  36     -0x4.59d72aeae98380e8p-4L, 0xc.e0aa3be4747dc1p-72L,
  37     -0x5.1862f08717b09f4p-4L, -0x2.decdeccf1cd10578p-68L,
  38     -0x5.ce75fdaef401a738p-4L, -0x9.314feb4fbde5aaep-72L,
  39     -0x6.7cc8fb2fe612fcbp-4L, 0x2.5ca2642feb779f98p-68L,
  40   };
  41
  42 /* High 32 bits of log2 (e), and remainder rounded to 64 bits.  */
  43 static const long double log2e_hi = 0x1.71547652p+0L;
  44 static const long double log2e_lo = 0xb.82fe1777d0ffda1p-36L;
  45
  46 /* Given a number with high part HI and low part LO, add the number X
  47    to it and store the result in *RHI and *RLO.  It is given that
  48    either |X| < |0.7 * HI|, or HI == LO == 0, and that the values are
  49    small enough that no overflow occurs.  The result does not need to
  50    be exact to 128 bits; 78-bit accuracy of the final accumulated
  51    result suffices.  */
  52
  53 static inline void
  54 acc_split (long double *rhi, long double *rlo, long double hi, long double lo,
  55            long double x)
  56 {
  57   long double thi = hi + x;
  58   long double tlo = (hi - thi) + x + lo;
  59   *rhi = thi + tlo;
  60   *rlo = (thi - *rhi) + tlo;
  61 }
  62
  63 extern long double __powl_helper (long double x, long double y);
  64 libm_hidden_proto (__powl_helper)
  65
  66 /* Given X a value that is finite and nonzero, or a NaN, and only
  67    negative if Y is not an integer, and Y a finite nonzero value with
  68    0x1p-79 <= |Y| <= 0x1p78, compute X to the power Y.  */
  69
  70 long double
  71 __powl_helper (long double x, long double y)
  72 {
  73   if (isnan (x) || x < 0)
  74     return __ieee754_expl (y * __ieee754_logl (x));
  75
  76   /* We need to compute Y * log2 (X) to at least 64 bits after the
  77      point for normal results (that is, to at least 78 bits
  78      precision).  */
  79   int x_int_exponent;
  80   long double x_frac;
  81   x_frac = __frexpl (x, &x_int_exponent);
  82   if (x_frac <= 0x0.aaaaaaaaaaaaaaaap0L) /* 2.0L / 3.0L, rounded down */
  83     {
  84       x_frac *= 2.0;
  85       x_int_exponent--;
  86     }
  87
  88   long double log_x_frac_hi, log_x_frac_lo;
  89   /* Determine an initial approximation to log (X_FRAC) using
  90      POWL_LOG_TABLE, and multiply by a value K/16 to reduce to an
  91      interval (24/25, 26/25).  */
  92   int k = (int) ((16.0L / x_frac) + 0.5L);
  93   log_x_frac_hi = powl_log_table[2 * k - 24];
  94   log_x_frac_lo = powl_log_table[2 * k - 23];
  95   long double x_frac_low;
  96   if (k == 16)
  97     x_frac_low = 0.0L;
  98   else
  99     {
 100       /* Mask off low 5 bits of X_FRAC so the multiplication by K/16
 101          is exact.  These bits are small enough that they can be
 102          corrected for by adding log2 (e) * X_FRAC_LOW to the final
 103          result.  */
 104       int32_t se;
 105       u_int32_t i0, i1;
 106       GET_LDOUBLE_WORDS (se, i0, i1, x_frac);
 107       x_frac_low = x_frac;
 108       i1 &= 0xffffffe0;
 109       SET_LDOUBLE_WORDS (x_frac, se, i0, i1);
 110       x_frac_low -= x_frac;
 111       x_frac_low /= x_frac;
 112       x_frac *= k / 16.0L;
 113     }
 114
 115   /* Now compute log (X_FRAC) for X_FRAC in (24/25, 26/25).  Separate
 116      W = X_FRAC - 1 into high 16 bits and remaining bits, so that
 117      multiplications for low-order power series terms are exact.  The
 118      remaining bits are small enough that adding a 64-bit value of
 119      log2 (1 + W_LO / (1 + W_HI)) will be a sufficient correction for
 120      them.  */
 121   long double w = x_frac - 1;
 122   long double w_hi, w_lo;
 123   int32_t se;
 124   u_int32_t i0, i1;
 125   GET_LDOUBLE_WORDS (se, i0, i1, w);
 126   i0 &= 0xffff0000;
 127   i1 = 0;
 128   SET_LDOUBLE_WORDS (w_hi, se, i0, i1);
 129   w_lo = w - w_hi;
 130   long double wp = w_hi;
 131   acc_split (&log_x_frac_hi, &log_x_frac_lo, log_x_frac_hi, log_x_frac_lo, wp);
 132   wp *= -w_hi;
 133   acc_split (&log_x_frac_hi, &log_x_frac_lo, log_x_frac_hi, log_x_frac_lo,
 134              wp / 2.0L);
 135   wp *= -w_hi;
 136   acc_split (&log_x_frac_hi, &log_x_frac_lo, log_x_frac_hi, log_x_frac_lo,
 137              wp * 0x0.5555p0L); /* -W_HI**3 / 3, high part.  */
 138   acc_split (&log_x_frac_hi, &log_x_frac_lo, log_x_frac_hi, log_x_frac_lo,
 139              wp * 0x0.5555555555555555p-16L); /* -W_HI**3 / 3, low part.  */
 140   wp *= -w_hi;
 141   acc_split (&log_x_frac_hi, &log_x_frac_lo, log_x_frac_hi, log_x_frac_lo,
 142              wp / 4.0L);
 143   /* Subsequent terms are small enough that they only need be computed
 144      to 64 bits.  */
 145   for (int i = 5; i <= 17; i++)
 146     {
 147       wp *= -w_hi;
 148       acc_split (&log_x_frac_hi, &log_x_frac_lo, log_x_frac_hi, log_x_frac_lo,
 149                  wp / i);
 150     }
 151
 152   /* Convert LOG_X_FRAC_HI + LOG_X_FRAC_LO to a base-2 logarithm.  */
 153   long double log2_x_frac_hi, log2_x_frac_lo;
 154   long double log_x_frac_hi32, log_x_frac_lo64;
 155   GET_LDOUBLE_WORDS (se, i0, i1, log_x_frac_hi);
 156   i1 = 0;
 157   SET_LDOUBLE_WORDS (log_x_frac_hi32, se, i0, i1);
 158   log_x_frac_lo64 = (log_x_frac_hi - log_x_frac_hi32) + log_x_frac_lo;
 159   long double log2_x_frac_hi1 = log_x_frac_hi32 * log2e_hi;
 160   long double log2_x_frac_lo1
 161     = log_x_frac_lo64 * log2e_hi + log_x_frac_hi * log2e_lo;
 162   log2_x_frac_hi = log2_x_frac_hi1 + log2_x_frac_lo1;
 163   log2_x_frac_lo = (log2_x_frac_hi1 - log2_x_frac_hi) + log2_x_frac_lo1;
 164
 165   /* Correct for the masking off of W_LO.  */
 166   long double log2_1p_w_lo;
 167   asm ("fyl2xp1"
 168        : "=t" (log2_1p_w_lo)
 169        : "0" (w_lo / (1.0L + w_hi)), "u" (1.0L)
 170        : "st(1)");
 171   acc_split (&log2_x_frac_hi, &log2_x_frac_lo, log2_x_frac_hi, log2_x_frac_lo,
 172              log2_1p_w_lo);
 173
 174   /* Correct for the masking off of X_FRAC_LOW.  */
 175   acc_split (&log2_x_frac_hi, &log2_x_frac_lo, log2_x_frac_hi, log2_x_frac_lo,
 176              x_frac_low * M_LOG2El);
 177
 178   /* Add the integer and fractional parts of the base-2 logarithm.  */
 179   long double log2_x_hi, log2_x_lo;
 180   log2_x_hi = x_int_exponent + log2_x_frac_hi;
 181   log2_x_lo = ((x_int_exponent - log2_x_hi) + log2_x_frac_hi) + log2_x_frac_lo;
 182
 183   /* Compute the base-2 logarithm of the result.  */
 184   long double log2_res_hi, log2_res_lo;
 185   long double log2_x_hi32, log2_x_lo64;
 186   GET_LDOUBLE_WORDS (se, i0, i1, log2_x_hi);
 187   i1 = 0;
 188   SET_LDOUBLE_WORDS (log2_x_hi32, se, i0, i1);
 189   log2_x_lo64 = (log2_x_hi - log2_x_hi32) + log2_x_lo;
 190   long double y_hi32, y_lo32;
 191   GET_LDOUBLE_WORDS (se, i0, i1, y);
 192   i1 = 0;
 193   SET_LDOUBLE_WORDS (y_hi32, se, i0, i1);
 194   y_lo32 = y - y_hi32;
 195   log2_res_hi = log2_x_hi32 * y_hi32;
 196   log2_res_lo = log2_x_hi32 * y_lo32 + log2_x_lo64 * y;
 197
 198   /* Split the base-2 logarithm of the result into integer and
 199      fractional parts.  */
 200   long double log2_res_int = __roundl (log2_res_hi);
 201   long double log2_res_frac = log2_res_hi - log2_res_int + log2_res_lo;
 202
 203   /* Compute the final result.  */
 204   long double res;
 205   asm ("f2xm1" : "=t" (res) : "0" (log2_res_frac));
 206   res += 1.0L;
 207   asm ("fscale" : "=t" (res) : "0" (res), "u" (log2_res_int));
 208   return res;
 209 }
 210
 211 libm_hidden_def (__powl_helper)