sysdeps/ieee754/flt-32/s_cosf.c

   1 /* Compute cosine of argument.
   2    Copyright (C) 2017 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Lesser General Public
   7    License as published by the Free Software Foundation; either
   8    version 2.1 of the License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Lesser General Public License for more details.
  14
  15    You should have received a copy of the GNU Lesser General Public
  16    License along with the GNU C Library; if not, see
  17    <http://www.gnu.org/licenses/>.  */
  18
  19 #include <errno.h>
  20 #include <math.h>
  21 #include <math_private.h>
  22 #include <libm-alias-float.h>
  23
  24 #ifndef COSF
  25 # define COSF_FUNC __cosf
  26 #else
  27 # define COSF_FUNC COSF
  28 #endif
  29
  30 /* Chebyshev constants for cos, range -PI/4 - PI/4.  */
  31 static const double C0 = -0x1.ffffffffe98aep-2;
  32 static const double C1 =  0x1.55555545c50c7p-5;
  33 static const double C2 = -0x1.6c16b348b6874p-10;
  34 static const double C3 =  0x1.a00eb9ac43ccp-16;
  35 static const double C4 = -0x1.23c97dd8844d7p-22;
  36
  37 /* Chebyshev constants for sin, range -PI/4 - PI/4.  */
  38 static const double S0 = -0x1.5555555551cd9p-3;
  39 static const double S1 =  0x1.1111110c2688bp-7;
  40 static const double S2 = -0x1.a019f8b4bd1f9p-13;
  41 static const double S3 =  0x1.71d7264e6b5b4p-19;
  42 static const double S4 = -0x1.a947e1674b58ap-26;
  43
  44 /* Chebyshev constants for cos, range 2^-27 - 2^-5.  */
  45 static const double CC0 = -0x1.fffffff5cc6fdp-2;
  46 static const double CC1 =  0x1.55514b178dac5p-5;
  47
  48 /* PI/2 with 98 bits of accuracy.  */
  49 static const double PI_2_hi = 0x1.921fb544p+0;
  50 static const double PI_2_lo = 0x1.0b4611a626332p-34;
  51
  52 static const double inv_PI_4 = 0x1.45f306dc9c883p+0; /* 4/PI.  */
  53
  54 #define FLOAT_EXPONENT_SHIFT 23
  55 #define FLOAT_EXPONENT_BIAS 127
  56
  57 static const double pio2_table[] = {
  58   0 * M_PI_2,
  59   1 * M_PI_2,
  60   2 * M_PI_2,
  61   3 * M_PI_2,
  62   4 * M_PI_2,
  63   5 * M_PI_2
  64 };
  65
  66 static const double invpio4_table[] = {
  67   0x0p+0,
  68   0x1.45f306cp+0,
  69   0x1.c9c882ap-28,
  70   0x1.4fe13a8p-58,
  71   0x1.f47d4dp-85,
  72   0x1.bb81b6cp-112,
  73   0x1.4acc9ep-142,
  74   0x1.0e4107cp-169
  75 };
  76
  77 static const double ones[] = { 1.0, -1.0 };
  78
  79 /* Compute the cosine value using Chebyshev polynomials where
  80    THETA is the range reduced absolute value of the input
  81    and it is less than Pi/4,
  82    N is calculated as trunc(|x|/(Pi/4)) + 1 and it is used to decide
  83    whether a sine or cosine approximation is more accurate and
  84    the sign of the result.  */
  85 static inline float
  86 reduced (double theta, unsigned int n)
  87 {
  88   double sign, cx;
  89   const double theta2 = theta * theta;
  90
  91   /* Determine positive or negative primary interval.  */
  92   n += 2;
  93   sign = ones[(n >> 2) & 1];
  94
  95   /* Are we in the primary interval of sin or cos?  */
  96   if ((n & 2) == 0)
  97     {
  98       /* Here cosf() is calculated using sin Chebyshev polynomial:
  99         x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))).  */
 100       cx = S3 + theta2 * S4;
 101       cx = S2 + theta2 * cx;
 102       cx = S1 + theta2 * cx;
 103       cx = S0 + theta2 * cx;
 104       cx = theta + theta * theta2 * cx;
 105     }
 106   else
 107     {
 108      /* Here cosf() is calculated using cos Chebyshev polynomial:
 109         1.0+x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))).  */
 110       cx = C3 + theta2 * C4;
 111       cx = C2 + theta2 * cx;
 112       cx = C1 + theta2 * cx;
 113       cx = C0 + theta2 * cx;
 114       cx = 1. + theta2 * cx;
 115     }
 116   return sign * cx;
 117 }
 118
 119 float
 120 COSF_FUNC (float x)
 121 {
 122   double theta = x;
 123   double abstheta = fabs (theta);
 124   if (isless (abstheta, M_PI_4))
 125     {
 126       double cx;
 127       if (abstheta >= 0x1p-5)
 128         {
 129           const double theta2 = theta * theta;
 130           /* Chebyshev polynomial of the form for cos:
 131            * 1 + x^2 (C0 + x^2 (C1 + x^2 (C2 + x^2 (C3 + x^2 * C4)))).  */
 132           cx = C3 + theta2 * C4;
 133           cx = C2 + theta2 * cx;
 134           cx = C1 + theta2 * cx;
 135           cx = C0 + theta2 * cx;
 136           cx = 1. + theta2 * cx;
 137           return cx;
 138         }
 139       else if (abstheta >= 0x1p-27)
 140         {
 141           /* A simpler Chebyshev approximation is close enough for this range:
 142            * 1 + x^2 (CC0 + x^3 * CC1).  */
 143           const double theta2 = theta * theta;
 144           cx = CC0 + theta * theta2 * CC1;
 145           cx = 1.0 + theta2 * cx;
 146           return cx;
 147         }
 148       else
 149         {
 150           /* For small enough |theta|, this is close enough.  */
 151           return 1.0 - abstheta;
 152         }
 153     }
 154   else /* |theta| >= Pi/4.  */
 155     {
 156       if (isless (abstheta, 9 * M_PI_4))
 157         {
 158           /* There are cases where FE_UPWARD rounding mode can
 159              produce a result of abstheta * inv_PI_4 == 9,
 160              where abstheta < 9pi/4, so the domain for
 161              pio2_table must go to 5 (9 / 2 + 1).  */
 162           unsigned int n = (abstheta * inv_PI_4) + 1;
 163           theta = abstheta - pio2_table[n / 2];
 164           return reduced (theta, n);
 165         }
 166       else if (isless (abstheta, INFINITY))
 167         {
 168           if (abstheta < 0x1p+23)
 169             {
 170               unsigned int n = ((unsigned int) (abstheta * inv_PI_4)) + 1;
 171               double x = n / 2;
 172               theta = (abstheta - x * PI_2_hi) - x * PI_2_lo;
 173               /* Argument reduction needed.  */
 174               return reduced (theta, n);
 175             }
 176           else /* |theta| >= 2^23.  */
 177             {
 178               x = fabsf (x);
 179               int exponent;
 180               GET_FLOAT_WORD (exponent, x);
 181               exponent = (exponent >> FLOAT_EXPONENT_SHIFT)
 182                          - FLOAT_EXPONENT_BIAS;
 183               exponent += 3;
 184               exponent /= 28;
 185               double a = invpio4_table[exponent] * x;
 186               double b = invpio4_table[exponent + 1] * x;
 187               double c = invpio4_table[exponent + 2] * x;
 188               double d = invpio4_table[exponent + 3] * x;
 189               uint64_t l = a;
 190               l &= ~0x7;
 191               a -= l;
 192               double e = a + b;
 193               l = e;
 194               e = a - l;
 195               if (l & 1)
 196                 {
 197                   e -= 1.0;
 198                   e += b;
 199                   e += c;
 200                   e += d;
 201                   e *= M_PI_4;
 202                   return reduced (e, l + 1);
 203                 }
 204               else
 205                 {
 206                   e += b;
 207                   e += c;
 208                   e += d;
 209                   if (e <= 1.0)
 210                     {
 211                       e *= M_PI_4;
 212                       return reduced (e, l + 1);
 213                     }
 214                   else
 215                     {
 216                       l++;
 217                       e -= 2.0;
 218                       e *= M_PI_4;
 219                       return reduced (e, l + 1);
 220                     }
 221                 }
 222             }
 223         }
 224       else
 225         {
 226           int32_t ix;
 227           GET_FLOAT_WORD (ix, abstheta);
 228           /* cos(Inf or NaN) is NaN.  */
 229           if (ix == 0x7f800000) /* Inf.  */
 230             __set_errno (EDOM);
 231           return x - x;
 232         }
 233     }
 234 }
 235
 236 #ifndef COSF
 237 libm_alias_float (__cos, cos)
 238 #endif