1 /* Used by sinf, cosf and sincosf functions.
2 Copyright (C) 2018 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
21 #include "math_config.h"
24 static const double pi63
= 0x1.921FB54442D18p
-62;
26 static const double pio4
= 0x1.921FB54442D18p
-1;
28 /* The constants and polynomials for sine and cosine. */
31 double sign
[4]; /* Sign of sine in quadrants 0..3. */
32 double hpi_inv
; /* 2 / PI ( * 2^24 if !TOINT_INTRINSICS). */
33 double hpi
; /* PI / 2. */
34 double c0
, c1
, c2
, c3
, c4
; /* Cosine polynomial. */
35 double s1
, s2
, s3
; /* Sine polynomial. */
38 /* Polynomial data (the cosine polynomial is negated in the 2nd entry). */
39 extern const sincos_t __sincosf_table
[2] attribute_hidden
;
41 /* Table with 4/PI to 192 bit precision. */
42 extern const uint32_t __inv_pio4
[] attribute_hidden
;
44 /* Top 12 bits of the float representation with the sign bit cleared. */
45 static inline uint32_t
48 return (asuint (x
) >> 20) & 0x7ff;
51 /* Compute the sine and cosine of inputs X and X2 (X squared), using the
52 polynomial P and store the results in SINP and COSP. N is the quadrant,
53 if odd the cosine and sine polynomials are swapped. */
55 sincosf_poly (double x
, double x2
, const sincos_t
*p
, int n
, float *sinp
,
58 double x3
, x4
, x5
, x6
, s
, c
, c1
, c2
, s1
;
62 c2
= p
->c3
+ x2
* p
->c4
;
63 s1
= p
->s2
+ x2
* p
->s3
;
65 /* Swap sin/cos result based on quadrant. */
66 float *tmp
= (n
& 1 ? cosp
: sinp
);
67 cosp
= (n
& 1 ? sinp
: cosp
);
70 c1
= p
->c0
+ x2
* p
->c1
;
81 /* Return the sine of inputs X and X2 (X squared) using the polynomial P.
82 N is the quadrant, and if odd the cosine polynomial is used. */
84 sinf_poly (double x
, double x2
, const sincos_t
*p
, int n
)
86 double x3
, x4
, x6
, x7
, s
, c
, c1
, c2
, s1
;
91 s1
= p
->s2
+ x2
* p
->s3
;
101 c2
= p
->c3
+ x2
* p
->c4
;
102 c1
= p
->c0
+ x2
* p
->c1
;
111 /* Fast range reduction using single multiply-subtract. Return the modulo of
112 X as a value between -PI/4 and PI/4 and store the quadrant in NP.
113 The values for PI/2 and 2/PI are accessed via P. Since PI/2 as a double
114 is accurate to 55 bits and the worst-case cancellation happens at 6 * PI/4,
115 the result is accurate for |X| <= 120.0. */
117 reduce_fast (double x
, const sincos_t
*p
, int *np
)
121 /* Use fast round and lround instructions when available. */
123 *np
= converttoint (r
);
124 return x
- roundtoint (r
) * p
->hpi
;
126 /* Use scaled float to int conversion with explicit rounding.
127 hpi_inv is prescaled by 2^24 so the quadrant ends up in bits 24..31.
128 This avoids inaccuracies introduced by truncating negative values. */
130 int n
= ((int32_t)r
+ 0x800000) >> 24;
132 return x
- n
* p
->hpi
;
136 /* Reduce the range of XI to a multiple of PI/2 using fast integer arithmetic.
137 XI is a reinterpreted float and must be >= 2.0f (the sign bit is ignored).
138 Return the modulo between -PI/4 and PI/4 and store the quadrant in NP.
139 Reduction uses a table of 4/PI with 192 bits of precision. A 32x96->128 bit
140 multiply computes the exact 2.62-bit fixed-point modulo. Since the result
141 can have at most 29 leading zeros after the binary point, the double
142 precision result is accurate to 33 bits. */
144 reduce_large (uint32_t xi
, int *np
)
146 const uint32_t *arr
= &__inv_pio4
[(xi
>> 26) & 15];
147 int shift
= (xi
>> 23) & 7;
148 uint64_t n
, res0
, res1
, res2
;
150 xi
= (xi
& 0xffffff) | 0x800000;
154 res1
= (uint64_t)xi
* arr
[4];
155 res2
= (uint64_t)xi
* arr
[8];
156 res0
= (res2
>> 32) | (res0
<< 32);
159 n
= (res0
+ (1ULL << 61)) >> 62;
161 double x
= (int64_t)res0
;