2 /* { dg-options "-O3 --save-temps" } */
9 test_frecps_float32_t (void)
12 float32_t value
= 0.2;
13 float32_t reciprocal
= 5.0;
14 float32_t step
= vrecpes_f32 (value
);
15 /* 3 steps should give us within ~0.001 accuracy. */
16 for (i
= 0; i
< 3; i
++)
17 step
= step
* vrecpss_f32 (step
, value
);
19 return fabs (step
- reciprocal
) < 0.001;
22 /* { dg-final { scan-assembler "frecpe\\ts\[0-9\]+, s\[0-9\]+" } } */
23 /* { dg-final { scan-assembler "frecps\\ts\[0-9\]+, s\[0-9\]+, s\[0-9\]+" } } */
26 test_frecps_float32x2_t (void)
31 const float32_t value_pool
[] = {0.2, 0.4};
32 const float32_t reciprocal_pool
[] = {5.0, 2.5};
33 float32x2_t value
= vld1_f32 (value_pool
);
34 float32x2_t reciprocal
= vld1_f32 (reciprocal_pool
);
36 float32x2_t step
= vrecpe_f32 (value
);
37 /* 3 steps should give us within ~0.001 accuracy. */
38 for (i
= 0; i
< 3; i
++)
39 step
= step
* vrecps_f32 (step
, value
);
41 ret
&= fabs (vget_lane_f32 (step
, 0)
42 - vget_lane_f32 (reciprocal
, 0)) < 0.001;
43 ret
&= fabs (vget_lane_f32 (step
, 1)
44 - vget_lane_f32 (reciprocal
, 1)) < 0.001;
49 /* { dg-final { scan-assembler "frecpe\\tv\[0-9\]+.2s, v\[0-9\]+.2s" } } */
50 /* { dg-final { scan-assembler "frecps\\tv\[0-9\]+.2s, v\[0-9\]+.2s, v\[0-9\]+.2s" } } */
53 test_frecps_float32x4_t (void)
58 const float32_t value_pool
[] = {0.2, 0.4, 0.5, 0.8};
59 const float32_t reciprocal_pool
[] = {5.0, 2.5, 2.0, 1.25};
60 float32x4_t value
= vld1q_f32 (value_pool
);
61 float32x4_t reciprocal
= vld1q_f32 (reciprocal_pool
);
63 float32x4_t step
= vrecpeq_f32 (value
);
64 /* 3 steps should give us within ~0.001 accuracy. */
65 for (i
= 0; i
< 3; i
++)
66 step
= step
* vrecpsq_f32 (step
, value
);
68 ret
&= fabs (vgetq_lane_f32 (step
, 0)
69 - vgetq_lane_f32 (reciprocal
, 0)) < 0.001;
70 ret
&= fabs (vgetq_lane_f32 (step
, 1)
71 - vgetq_lane_f32 (reciprocal
, 1)) < 0.001;
72 ret
&= fabs (vgetq_lane_f32 (step
, 2)
73 - vgetq_lane_f32 (reciprocal
, 2)) < 0.001;
74 ret
&= fabs (vgetq_lane_f32 (step
, 3)
75 - vgetq_lane_f32 (reciprocal
, 3)) < 0.001;
80 /* { dg-final { scan-assembler "frecpe\\tv\[0-9\]+.4s, v\[0-9\]+.4s" } } */
81 /* { dg-final { scan-assembler "frecps\\tv\[0-9\]+.4s, v\[0-9\]+.4s, v\[0-9\]+.4s" } } */
84 test_frecps_float64_t (void)
87 float64_t value
= 0.2;
88 float64_t reciprocal
= 5.0;
89 float64_t step
= vrecped_f64 (value
);
90 /* 3 steps should give us within ~0.001 accuracy. */
91 for (i
= 0; i
< 3; i
++)
92 step
= step
* vrecpsd_f64 (step
, value
);
94 return fabs (step
- reciprocal
) < 0.001;
97 /* { dg-final { scan-assembler "frecpe\\td\[0-9\]+, d\[0-9\]+" } } */
98 /* { dg-final { scan-assembler "frecps\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" } } */
101 test_frecps_float64x2_t (void)
106 const float64_t value_pool
[] = {0.2, 0.4};
107 const float64_t reciprocal_pool
[] = {5.0, 2.5};
108 float64x2_t value
= vld1q_f64 (value_pool
);
109 float64x2_t reciprocal
= vld1q_f64 (reciprocal_pool
);
111 float64x2_t step
= vrecpeq_f64 (value
);
112 /* 3 steps should give us within ~0.001 accuracy. */
113 for (i
= 0; i
< 3; i
++)
114 step
= step
* vrecpsq_f64 (step
, value
);
116 ret
&= fabs (vgetq_lane_f64 (step
, 0)
117 - vgetq_lane_f64 (reciprocal
, 0)) < 0.001;
118 ret
&= fabs (vgetq_lane_f64 (step
, 1)
119 - vgetq_lane_f64 (reciprocal
, 1)) < 0.001;
124 /* { dg-final { scan-assembler "frecpe\\tv\[0-9\]+.2d, v\[0-9\]+.2d" } } */
125 /* { dg-final { scan-assembler "frecps\\tv\[0-9\]+.2d, v\[0-9\]+.2d, v\[0-9\]+.2d" } } */
128 main (int argc
, char **argv
)
130 if (!test_frecps_float32_t ())
132 if (!test_frecps_float32x2_t ())
134 if (!test_frecps_float32x4_t ())
136 if (!test_frecps_float64_t ())
138 if (!test_frecps_float64x2_t ())