sysdeps/ieee754/flt-32/s_sincosf.h

   1 /* Used by sinf, cosf and sincosf functions.
   2    Copyright (C) 2018 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Lesser General Public
   7    License as published by the Free Software Foundation; either
   8    version 2.1 of the License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Lesser General Public License for more details.
  14
  15    You should have received a copy of the GNU Lesser General Public
  16    License along with the GNU C Library; if not, see
  17    <http://www.gnu.org/licenses/>.  */
  18
  19 #include <stdint.h>
  20 #include <math.h>
  21 #include "math_config.h"
  22
  23 /* 2PI * 2^-64.  */
  24 static const double pi63 = 0x1.921FB54442D18p-62;
  25 /* PI / 4.  */
  26 static const double pio4 = 0x1.921FB54442D18p-1;
  27
  28 /* The constants and polynomials for sine and cosine.  */
  29 typedef struct
  30 {
  31   double sign[4];               /* Sign of sine in quadrants 0..3.  */
  32   double hpi_inv;               /* 2 / PI ( * 2^24 if !TOINT_INTRINSICS).  */
  33   double hpi;                   /* PI / 2.  */
  34   double c0, c1, c2, c3, c4;    /* Cosine polynomial.  */
  35   double s1, s2, s3;            /* Sine polynomial.  */
  36 } sincos_t;
  37
  38 /* Polynomial data (the cosine polynomial is negated in the 2nd entry).  */
  39 extern const sincos_t __sincosf_table[2] attribute_hidden;
  40
  41 /* Table with 4/PI to 192 bit precision.  */
  42 extern const uint32_t __inv_pio4[] attribute_hidden;
  43
  44 /* Top 12 bits of the float representation with the sign bit cleared.  */
  45 static inline uint32_t
  46 abstop12 (float x)
  47 {
  48   return (asuint (x) >> 20) & 0x7ff;
  49 }
  50
  51 /* Compute the sine and cosine of inputs X and X2 (X squared), using the
  52    polynomial P and store the results in SINP and COSP.  N is the quadrant,
  53    if odd the cosine and sine polynomials are swapped.  */
  54 static inline void
  55 sincosf_poly (double x, double x2, const sincos_t *p, int n, float *sinp,
  56               float *cosp)
  57 {
  58   double x3, x4, x5, x6, s, c, c1, c2, s1;
  59
  60   x4 = x2 * x2;
  61   x3 = x2 * x;
  62   c2 = p->c3 + x2 * p->c4;
  63   s1 = p->s2 + x2 * p->s3;
  64
  65   /* Swap sin/cos result based on quadrant.  */
  66   float *tmp = (n & 1 ? cosp : sinp);
  67   cosp = (n & 1 ? sinp : cosp);
  68   sinp = tmp;
  69
  70   c1 = p->c0 + x2 * p->c1;
  71   x5 = x3 * x2;
  72   x6 = x4 * x2;
  73
  74   s = x + x3 * p->s1;
  75   c = c1 + x4 * p->c2;
  76
  77   *sinp = s + x5 * s1;
  78   *cosp = c + x6 * c2;
  79 }
  80
  81 /* Return the sine of inputs X and X2 (X squared) using the polynomial P.
  82    N is the quadrant, and if odd the cosine polynomial is used.  */
  83 static inline float
  84 sinf_poly (double x, double x2, const sincos_t *p, int n)
  85 {
  86   double x3, x4, x6, x7, s, c, c1, c2, s1;
  87
  88   if ((n & 1) == 0)
  89     {
  90       x3 = x * x2;
  91       s1 = p->s2 + x2 * p->s3;
  92
  93       x7 = x3 * x2;
  94       s = x + x3 * p->s1;
  95
  96       return s + x7 * s1;
  97     }
  98   else
  99     {
 100       x4 = x2 * x2;
 101       c2 = p->c3 + x2 * p->c4;
 102       c1 = p->c0 + x2 * p->c1;
 103
 104       x6 = x4 * x2;
 105       c = c1 + x4 * p->c2;
 106
 107       return c + x6 * c2;
 108     }
 109 }
 110
 111 /* Fast range reduction using single multiply-subtract.  Return the modulo of
 112    X as a value between -PI/4 and PI/4 and store the quadrant in NP.
 113    The values for PI/2 and 2/PI are accessed via P.  Since PI/2 as a double
 114    is accurate to 55 bits and the worst-case cancellation happens at 6 * PI/4,
 115    the result is accurate for |X| <= 120.0.  */
 116 static inline double
 117 reduce_fast (double x, const sincos_t *p, int *np)
 118 {
 119   double r;
 120 #if TOINT_INTRINSICS
 121   /* Use fast round and lround instructions when available.  */
 122   r = x * p->hpi_inv;
 123   *np = converttoint (r);
 124   return x - roundtoint (r) * p->hpi;
 125 #else
 126   /* Use scaled float to int conversion with explicit rounding.
 127      hpi_inv is prescaled by 2^24 so the quadrant ends up in bits 24..31.
 128      This avoids inaccuracies introduced by truncating negative values.  */
 129   r = x * p->hpi_inv;
 130   int n = ((int32_t)r + 0x800000) >> 24;
 131   *np = n;
 132   return x - n * p->hpi;
 133 #endif
 134 }
 135
 136 /* Reduce the range of XI to a multiple of PI/2 using fast integer arithmetic.
 137    XI is a reinterpreted float and must be >= 2.0f (the sign bit is ignored).
 138    Return the modulo between -PI/4 and PI/4 and store the quadrant in NP.
 139    Reduction uses a table of 4/PI with 192 bits of precision.  A 32x96->128 bit
 140    multiply computes the exact 2.62-bit fixed-point modulo.  Since the result
 141    can have at most 29 leading zeros after the binary point, the double
 142    precision result is accurate to 33 bits.  */
 143 static inline double
 144 reduce_large (uint32_t xi, int *np)
 145 {
 146   const uint32_t *arr = &__inv_pio4[(xi >> 26) & 15];
 147   int shift = (xi >> 23) & 7;
 148   uint64_t n, res0, res1, res2;
 149
 150   xi = (xi & 0xffffff) | 0x800000;
 151   xi <<= shift;
 152
 153   res0 = xi * arr[0];
 154   res1 = (uint64_t)xi * arr[4];
 155   res2 = (uint64_t)xi * arr[8];
 156   res0 = (res2 >> 32) | (res0 << 32);
 157   res0 += res1;
 158
 159   n = (res0 + (1ULL << 61)) >> 62;
 160   res0 -= n << 62;
 161   double x = (int64_t)res0;
 162   *np = n;
 163   return x * pi63;
 164 }