Add execution + assembler tests of AArch64 TRN Intrinsics.
[official-gcc.git] / gcc / testsuite / gcc.target / aarch64 / vrecps.c
blobc279a4493a9fe4207ce8f6e06489d351d301de69
1 /* { dg-do run } */
2 /* { dg-options "-O3 --save-temps" } */
4 #include <arm_neon.h>
5 #include <math.h>
6 #include <stdlib.h>
8 int
9 test_frecps_float32_t (void)
11 int i;
12 float32_t value = 0.2;
13 float32_t reciprocal = 5.0;
14 float32_t step = vrecpes_f32 (value);
15 /* 3 steps should give us within ~0.001 accuracy. */
16 for (i = 0; i < 3; i++)
17 step = step * vrecpss_f32 (step, value);
19 return fabs (step - reciprocal) < 0.001;
22 /* { dg-final { scan-assembler "frecpe\\ts\[0-9\]+, s\[0-9\]+" } } */
23 /* { dg-final { scan-assembler "frecps\\ts\[0-9\]+, s\[0-9\]+, s\[0-9\]+" } } */
25 int
26 test_frecps_float32x2_t (void)
28 int i;
29 int ret = 1;
31 const float32_t value_pool[] = {0.2, 0.4};
32 const float32_t reciprocal_pool[] = {5.0, 2.5};
33 float32x2_t value = vld1_f32 (value_pool);
34 float32x2_t reciprocal = vld1_f32 (reciprocal_pool);
36 float32x2_t step = vrecpe_f32 (value);
37 /* 3 steps should give us within ~0.001 accuracy. */
38 for (i = 0; i < 3; i++)
39 step = step * vrecps_f32 (step, value);
41 ret &= fabs (vget_lane_f32 (step, 0)
42 - vget_lane_f32 (reciprocal, 0)) < 0.001;
43 ret &= fabs (vget_lane_f32 (step, 1)
44 - vget_lane_f32 (reciprocal, 1)) < 0.001;
46 return ret;
49 /* { dg-final { scan-assembler "frecpe\\tv\[0-9\]+.2s, v\[0-9\]+.2s" } } */
50 /* { dg-final { scan-assembler "frecps\\tv\[0-9\]+.2s, v\[0-9\]+.2s, v\[0-9\]+.2s" } } */
52 int
53 test_frecps_float32x4_t (void)
55 int i;
56 int ret = 1;
58 const float32_t value_pool[] = {0.2, 0.4, 0.5, 0.8};
59 const float32_t reciprocal_pool[] = {5.0, 2.5, 2.0, 1.25};
60 float32x4_t value = vld1q_f32 (value_pool);
61 float32x4_t reciprocal = vld1q_f32 (reciprocal_pool);
63 float32x4_t step = vrecpeq_f32 (value);
64 /* 3 steps should give us within ~0.001 accuracy. */
65 for (i = 0; i < 3; i++)
66 step = step * vrecpsq_f32 (step, value);
68 ret &= fabs (vgetq_lane_f32 (step, 0)
69 - vgetq_lane_f32 (reciprocal, 0)) < 0.001;
70 ret &= fabs (vgetq_lane_f32 (step, 1)
71 - vgetq_lane_f32 (reciprocal, 1)) < 0.001;
72 ret &= fabs (vgetq_lane_f32 (step, 2)
73 - vgetq_lane_f32 (reciprocal, 2)) < 0.001;
74 ret &= fabs (vgetq_lane_f32 (step, 3)
75 - vgetq_lane_f32 (reciprocal, 3)) < 0.001;
77 return ret;
80 /* { dg-final { scan-assembler "frecpe\\tv\[0-9\]+.4s, v\[0-9\]+.4s" } } */
81 /* { dg-final { scan-assembler "frecps\\tv\[0-9\]+.4s, v\[0-9\]+.4s, v\[0-9\]+.4s" } } */
83 int
84 test_frecps_float64_t (void)
86 int i;
87 float64_t value = 0.2;
88 float64_t reciprocal = 5.0;
89 float64_t step = vrecped_f64 (value);
90 /* 3 steps should give us within ~0.001 accuracy. */
91 for (i = 0; i < 3; i++)
92 step = step * vrecpsd_f64 (step, value);
94 return fabs (step - reciprocal) < 0.001;
97 /* { dg-final { scan-assembler "frecpe\\td\[0-9\]+, d\[0-9\]+" } } */
98 /* { dg-final { scan-assembler "frecps\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" } } */
101 test_frecps_float64x2_t (void)
103 int i;
104 int ret = 1;
106 const float64_t value_pool[] = {0.2, 0.4};
107 const float64_t reciprocal_pool[] = {5.0, 2.5};
108 float64x2_t value = vld1q_f64 (value_pool);
109 float64x2_t reciprocal = vld1q_f64 (reciprocal_pool);
111 float64x2_t step = vrecpeq_f64 (value);
112 /* 3 steps should give us within ~0.001 accuracy. */
113 for (i = 0; i < 3; i++)
114 step = step * vrecpsq_f64 (step, value);
116 ret &= fabs (vgetq_lane_f64 (step, 0)
117 - vgetq_lane_f64 (reciprocal, 0)) < 0.001;
118 ret &= fabs (vgetq_lane_f64 (step, 1)
119 - vgetq_lane_f64 (reciprocal, 1)) < 0.001;
121 return ret;
124 /* { dg-final { scan-assembler "frecpe\\tv\[0-9\]+.2d, v\[0-9\]+.2d" } } */
125 /* { dg-final { scan-assembler "frecps\\tv\[0-9\]+.2d, v\[0-9\]+.2d, v\[0-9\]+.2d" } } */
128 main (int argc, char **argv)
130 if (!test_frecps_float32_t ())
131 abort ();
132 if (!test_frecps_float32x2_t ())
133 abort ();
134 if (!test_frecps_float32x4_t ())
135 abort ();
136 if (!test_frecps_float64_t ())
137 abort ();
138 if (!test_frecps_float64x2_t ())
139 abort ();
141 return 0;
144 /* { dg-final { cleanup-saved-temps } } */