1 /* Double-precision vector (Advanced SIMD) sin function.
3 Copyright (C) 2023 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <https://www.gnu.org/licenses/>. */
22 static const struct data
25 float64x2_t range_val
, inv_pi
, shift
, pi_1
, pi_2
, pi_3
;
27 /* Worst-case error is 2.8 ulp in [-pi/2, pi/2]. */
28 .poly
= { V2 (-0x1.555555555547bp
-3), V2 (0x1.1111111108a4dp
-7),
29 V2 (-0x1.a01a019936f27p
-13), V2 (0x1.71de37a97d93ep
-19),
30 V2 (-0x1.ae633919987c6p
-26), V2 (0x1.60e277ae07cecp
-33),
31 V2 (-0x1.9e9540300a1p
-41) },
33 .range_val
= V2 (0x1p
23),
34 .inv_pi
= V2 (0x1.45f306dc9c883p
-2),
35 .pi_1
= V2 (0x1.921fb54442d18p
+1),
36 .pi_2
= V2 (0x1.1a62633145c06p
-53),
37 .pi_3
= V2 (0x1.c1cd129024e09p
-106),
38 .shift
= V2 (0x1.8p52
),
42 # define TinyBound v_u64 (0x3000000000000000) /* asuint64 (0x1p-255). */
43 # define Thresh v_u64 (0x1160000000000000) /* RangeVal - TinyBound. */
46 #define C(i) d->poly[i]
48 static float64x2_t VPCS_ATTR NOINLINE
49 special_case (float64x2_t x
, float64x2_t y
, uint64x2_t odd
, uint64x2_t cmp
)
51 y
= vreinterpretq_f64_u64 (veorq_u64 (vreinterpretq_u64_f64 (y
), odd
));
52 return v_call_f64 (sin
, x
, y
, cmp
);
55 float64x2_t VPCS_ATTR
V_NAME_D1 (sin
) (float64x2_t x
)
57 const struct data
*d
= ptr_barrier (&data
);
58 float64x2_t n
, r
, r2
, r3
, r4
, y
, t1
, t2
, t3
;
59 uint64x2_t odd
, cmp
, eqz
;
62 /* Detect |x| <= TinyBound or |x| >= RangeVal. If fenv exceptions are to be
63 triggered correctly, set any special lanes to 1 (which is neutral w.r.t.
64 fenv). These lanes will be fixed by special-case handler later. */
65 uint64x2_t ir
= vreinterpretq_u64_f64 (vabsq_f64 (x
));
66 cmp
= vcgeq_u64 (vsubq_u64 (ir
, TinyBound
), Thresh
);
67 r
= vbslq_f64 (cmp
, vreinterpretq_f64_u64 (cmp
), x
);
70 cmp
= vcageq_f64 (d
->range_val
, x
);
71 cmp
= vceqzq_u64 (cmp
); /* cmp = ~cmp. */
75 /* n = rint(|x|/pi). */
76 n
= vfmaq_f64 (d
->shift
, d
->inv_pi
, r
);
77 odd
= vshlq_n_u64 (vreinterpretq_u64_f64 (n
), 63);
78 n
= vsubq_f64 (n
, d
->shift
);
80 /* r = |x| - n*pi (range reduction into -pi/2 .. pi/2). */
81 r
= vfmsq_f64 (r
, d
->pi_1
, n
);
82 r
= vfmsq_f64 (r
, d
->pi_2
, n
);
83 r
= vfmsq_f64 (r
, d
->pi_3
, n
);
85 /* sin(r) poly approx. */
86 r2
= vmulq_f64 (r
, r
);
87 r3
= vmulq_f64 (r2
, r
);
88 r4
= vmulq_f64 (r2
, r2
);
90 t1
= vfmaq_f64 (C (4), C (5), r2
);
91 t2
= vfmaq_f64 (C (2), C (3), r2
);
92 t3
= vfmaq_f64 (C (0), C (1), r2
);
94 y
= vfmaq_f64 (t1
, C (6), r4
);
95 y
= vfmaq_f64 (t2
, y
, r4
);
96 y
= vfmaq_f64 (t3
, y
, r4
);
97 y
= vfmaq_f64 (r
, y
, r3
);
99 /* Sign of 0 is discarded by polynomial, so copy it back here. */
100 if (__glibc_unlikely (v_any_u64 (eqz
)))
101 y
= vbslq_f64 (eqz
, x
, y
);
103 if (__glibc_unlikely (v_any_u64 (cmp
)))
104 return special_case (x
, y
, odd
, cmp
);
105 return vreinterpretq_f64_u64 (veorq_u64 (vreinterpretq_u64_f64 (y
), odd
));