sysdeps/powerpc/fpu/k_rem_pio2f.c

   1 /* k_rem_pio2f.c -- float version of e_rem_pio2.c
   2    Copyright (C) 2011-2018 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4    Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011
   5
   6    The GNU C Library is free software; you can redistribute it and/or
   7    modify it under the terms of the GNU Library General Public License as
   8    published by the Free Software Foundation; either version 2 of the
   9    License, or (at your option) any later version.
  10
  11    The GNU C Library is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14    Library General Public License for more details.
  15
  16    You should have received a copy of the GNU Library General Public
  17    License along with the GNU C Library; see the file COPYING.LIB.  If
  18    not, see <http://www.gnu.org/licenses/>.  */
  19
  20 #include <math.h>
  21
  22 #include <math_private.h>
  23 #include "s_float_bitwise.h"
  24
  25
  26 static const float two_over_pi[] = {
  27   1.62000000e+02, 2.49000000e+02, 1.31000000e+02, 1.10000000e+02,
  28   7.80000000e+01, 6.80000000e+01, 2.10000000e+01, 4.10000000e+01,
  29   2.52000000e+02, 3.90000000e+01, 8.70000000e+01, 2.09000000e+02,
  30   2.45000000e+02, 5.20000000e+01, 2.21000000e+02, 1.92000000e+02,
  31   2.19000000e+02, 9.80000000e+01, 1.49000000e+02, 1.53000000e+02,
  32   6.00000000e+01, 6.70000000e+01, 1.44000000e+02, 6.50000000e+01,
  33   2.54000000e+02, 8.10000000e+01, 9.90000000e+01, 1.71000000e+02,
  34   2.22000000e+02, 1.87000000e+02, 1.97000000e+02, 9.70000000e+01,
  35   1.83000000e+02, 3.60000000e+01, 1.10000000e+02, 5.80000000e+01,
  36   6.60000000e+01, 7.70000000e+01, 2.10000000e+02, 2.24000000e+02,
  37   6.00000000e+00, 7.30000000e+01, 4.60000000e+01, 2.34000000e+02,
  38   9.00000000e+00, 2.09000000e+02, 1.46000000e+02, 2.80000000e+01,
  39   2.54000000e+02, 2.90000000e+01, 2.35000000e+02, 2.80000000e+01,
  40   1.77000000e+02, 4.10000000e+01, 1.67000000e+02, 6.20000000e+01,
  41   2.32000000e+02, 1.30000000e+02, 5.30000000e+01, 2.45000000e+02,
  42   4.60000000e+01, 1.87000000e+02, 6.80000000e+01, 1.32000000e+02,
  43   2.33000000e+02, 1.56000000e+02, 1.12000000e+02, 3.80000000e+01,
  44   1.80000000e+02, 9.50000000e+01, 1.26000000e+02, 6.50000000e+01,
  45   5.70000000e+01, 1.45000000e+02, 2.14000000e+02, 5.70000000e+01,
  46   1.31000000e+02, 8.30000000e+01, 5.70000000e+01, 2.44000000e+02,
  47   1.56000000e+02, 1.32000000e+02, 9.50000000e+01, 1.39000000e+02,
  48   1.89000000e+02, 2.49000000e+02, 4.00000000e+01, 5.90000000e+01,
  49   3.10000000e+01, 2.48000000e+02, 1.51000000e+02, 2.55000000e+02,
  50   2.22000000e+02, 5.00000000e+00, 1.52000000e+02, 1.50000000e+01,
  51   2.39000000e+02, 4.70000000e+01, 1.70000000e+01, 1.39000000e+02,
  52   9.00000000e+01, 1.00000000e+01, 1.09000000e+02, 3.10000000e+01,
  53   1.09000000e+02, 5.40000000e+01, 1.26000000e+02, 2.07000000e+02,
  54   3.90000000e+01, 2.03000000e+02, 9.00000000e+00, 1.83000000e+02,
  55   7.90000000e+01, 7.00000000e+01, 6.30000000e+01, 1.02000000e+02,
  56   1.58000000e+02, 9.50000000e+01, 2.34000000e+02, 4.50000000e+01,
  57   1.17000000e+02, 3.90000000e+01, 1.86000000e+02, 1.99000000e+02,
  58   2.35000000e+02, 2.29000000e+02, 2.41000000e+02, 1.23000000e+02,
  59   6.10000000e+01, 7.00000000e+00, 5.70000000e+01, 2.47000000e+02,
  60   1.38000000e+02, 8.20000000e+01, 1.46000000e+02, 2.34000000e+02,
  61   1.07000000e+02, 2.51000000e+02, 9.50000000e+01, 1.77000000e+02,
  62   3.10000000e+01, 1.41000000e+02, 9.30000000e+01, 8.00000000e+00,
  63   8.60000000e+01, 3.00000000e+00, 4.80000000e+01, 7.00000000e+01,
  64   2.52000000e+02, 1.23000000e+02, 1.07000000e+02, 1.71000000e+02,
  65   2.40000000e+02, 2.07000000e+02, 1.88000000e+02, 3.20000000e+01,
  66   1.54000000e+02, 2.44000000e+02, 5.40000000e+01, 2.90000000e+01,
  67   1.69000000e+02, 2.27000000e+02, 1.45000000e+02, 9.70000000e+01,
  68   9.40000000e+01, 2.30000000e+02, 2.70000000e+01, 8.00000000e+00,
  69   1.01000000e+02, 1.53000000e+02, 1.33000000e+02, 9.50000000e+01,
  70   2.00000000e+01, 1.60000000e+02, 1.04000000e+02, 6.40000000e+01,
  71   1.41000000e+02, 2.55000000e+02, 2.16000000e+02, 1.28000000e+02,
  72   7.70000000e+01, 1.15000000e+02, 3.90000000e+01, 4.90000000e+01,
  73   6.00000000e+00, 6.00000000e+00, 2.10000000e+01, 8.60000000e+01,
  74   2.02000000e+02, 1.15000000e+02, 1.68000000e+02, 2.01000000e+02,
  75   9.60000000e+01, 2.26000000e+02, 1.23000000e+02, 1.92000000e+02,
  76   1.40000000e+02, 1.07000000e+02
  77 };
  78
  79
  80 static const float PIo2[] = {
  81   1.5703125000e+00,             /* 0x3fc90000 */
  82   4.5776367188e-04,             /* 0x39f00000 */
  83   2.5987625122e-05,             /* 0x37da0000 */
  84   7.5437128544e-08,             /* 0x33a20000 */
  85   6.0026650317e-11,             /* 0x2e840000 */
  86   7.3896444519e-13,             /* 0x2b500000 */
  87   5.3845816694e-15,             /* 0x27c20000 */
  88   5.6378512969e-18,             /* 0x22d00000 */
  89   8.3009228831e-20,             /* 0x1fc40000 */
  90   3.2756352257e-22,             /* 0x1bc60000 */
  91   6.3331015649e-25,             /* 0x17440000 */
  92 };
  93
  94
  95 static const float zero  = 0.0000000000e+00;
  96 static const float one   = 1.0000000000;
  97 static const float twon8 = 3.9062500000e-03;
  98 static const float two8  = 2.5600000000e+02;
  99
 100
 101 int32_t
 102 __fp_kernel_rem_pio2f (float *x, float *y, float e0, int32_t nx)
 103 {
 104   int32_t jz, jx, jv, jp, jk, carry, n, iq[20], i, j, k, m, q0, ih, exp;
 105   float z, fw, f[20], fq[20], q[20];
 106
 107   /* initialize jk */
 108   jp = jk = 9;
 109
 110   /* determine jx,jv,q0, note that 3>q0 */
 111   jx = nx - 1;
 112   exp = __float_get_exp (e0) - 127;
 113   jv = (exp - 3) / 8;
 114   if (jv < 0)
 115     jv = 0;
 116   q0 = exp - 8 * (jv + 1);
 117
 118   /* set up f[0] to f[jx+jk] where f[jx+jk] = two_over_pi[jv+jk] */
 119   j = jv - jx;
 120   m = jx + jk;
 121   for (i = 0; i <= m; i++, j++)
 122     f[i] = (j < 0) ? zero : two_over_pi[j];
 123
 124   /* compute q[0],q[1],...q[jk] */
 125   for (i = 0; i <= jk; i++)
 126     {
 127       for (j = 0, fw = 0.0; j <= jx; j++)
 128         fw += x[j] * f[jx + i - j];
 129       q[i] = fw;
 130     }
 131
 132   jz = jk;
 133 recompute:
 134   /* distill q[] into iq[] reversingly */
 135   for (i = 0, j = jz, z = q[jz]; j > 0; i++, j--)
 136     {
 137       fw = __truncf (twon8 * z);
 138       iq[i] = (int32_t) (z - two8 * fw);
 139       z = q[j - 1] + fw;
 140     }
 141
 142   /* compute n */
 143   z = __scalbnf (z, q0);        /* actual value of z */
 144   z -= 8.0 * __floorf (z * 0.125);      /* trim off integer >= 8 */
 145   n = (int32_t) z;
 146   z -= __truncf (z);
 147   ih = 0;
 148   if (q0 > 0)
 149     {                           /* need iq[jz-1] to determine n */
 150       i = (iq[jz - 1] >> (8 - q0));
 151       n += i;
 152       iq[jz - 1] -= i << (8 - q0);
 153       ih = iq[jz - 1] >> (7 - q0);
 154     }
 155   else if (q0 == 0)
 156     ih = iq[jz - 1] >> 7;
 157   else if (z >= 0.5)
 158     ih = 2;
 159
 160   if (ih > 0)
 161     {                           /* q > 0.5 */
 162       n += 1;
 163       carry = 0;
 164       for (i = 0; i < jz; i++)
 165         {                       /* compute 1-q */
 166           j = iq[i];
 167           if (carry == 0)
 168             {
 169               if (j != 0)
 170                 {
 171                   carry = 1;
 172                   iq[i] = 0x100 - j;
 173                 }
 174             }
 175           else
 176             iq[i] = 0xff - j;
 177         }
 178       if (q0 > 0)
 179         {                       /* rare case: chance is 1 in 12 */
 180           switch (q0)
 181             {
 182             case 1:
 183               iq[jz - 1] &= 0x7f;
 184               break;
 185             case 2:
 186               iq[jz - 1] &= 0x3f;
 187               break;
 188             }
 189         }
 190       if (ih == 2)
 191         {
 192           z = one - z;
 193           if (carry != 0)
 194             z -= __scalbnf (one, q0);
 195         }
 196     }
 197
 198   /* check if recomputation is needed */
 199   if (z == zero)
 200     {
 201       j = 0;
 202       for (i = jz - 1; i >= jk; i--)
 203         j |= iq[i];
 204       if (j == 0)
 205         {                       /* need recomputation */
 206           for (k = 1; iq[jk - k] == 0; k++);    /* k = no. of terms needed */
 207
 208           for (i = jz + 1; i <= jz + k; i++)
 209             {                   /* add q[jz+1] to q[jz+k] */
 210               f[jx + i] = two_over_pi[jv + i];
 211               for (j = 0, fw = 0.0; j <= jx; j++)
 212                 fw += x[j] * f[jx + i - j];
 213               q[i] = fw;
 214             }
 215           jz += k;
 216           goto recompute;
 217         }
 218     }
 219
 220   /* chop off zero terms */
 221   if (z == 0.0)
 222     {
 223       jz -= 1;
 224       q0 -= 8;
 225       while (iq[jz] == 0)
 226         {
 227           jz--;
 228           q0 -= 8;
 229         }
 230     }
 231   else
 232     {                           /* break z into 8-bit if necessary */
 233       z = __scalbnf (z, -q0);
 234       if (z >= two8)
 235         {
 236           fw = __truncf (twon8 * z);
 237           iq[jz] = (int32_t) (z - two8 * fw);
 238           jz += 1;
 239           q0 += 8;
 240           iq[jz] = (int32_t) fw;
 241         }
 242       else
 243         iq[jz] = (int32_t) z;
 244     }
 245
 246   /* convert integer "bit" chunk to floating-point value */
 247   fw = __scalbnf (one, q0);
 248   for (i = jz; i >= 0; i--)
 249     {
 250       q[i] = fw * (float) iq[i];
 251       fw *= twon8;
 252     }
 253
 254   /* compute PIo2[0,...,jp]*q[jz,...,0] */
 255   for (i = jz; i >= 0; i--)
 256     {
 257       for (fw = 0.0, k = 0; k <= jp && k <= jz - i; k++)
 258         fw += PIo2[k] * q[i + k];
 259       fq[jz - i] = fw;
 260     }
 261
 262   /* compress fq[] into y[] */
 263   fw = 0.0;
 264   for (i = jz; i >= 0; i--)
 265     fw += fq[i];
 266   y[0] = (ih == 0) ? fw : -fw;
 267   fw = fq[0] - fw;
 268   for (i = 1; i <= jz; i++)
 269     fw += fq[i];
 270   y[1] = (ih == 0) ? fw : -fw;
 271
 272   return n & 7;
 273 }