gcc/testsuite/gcc.target/aarch64/vrecps.c

   1 /* { dg-do run } */
   2 /* { dg-options "-O3 --save-temps" } */
   3
   4 #include <arm_neon.h>
   5 #include <math.h>
   6 #include <stdlib.h>
   7
   8 int
   9 test_frecps_float32_t (void)
  10 {
  11   int i;
  12   float32_t value = 0.2;
  13   float32_t reciprocal = 5.0;
  14   float32_t step = vrecpes_f32 (value);
  15   /* 3 steps should give us within ~0.001 accuracy.  */
  16   for (i = 0; i < 3; i++)
  17     step = step * vrecpss_f32 (step, value);
  18
  19   return fabs (step - reciprocal) < 0.001;
  20 }
  21
  22 /* { dg-final { scan-assembler "frecpe\\ts\[0-9\]+, s\[0-9\]+" } } */
  23 /* { dg-final { scan-assembler "frecps\\ts\[0-9\]+, s\[0-9\]+, s\[0-9\]+" } } */
  24
  25 int
  26 test_frecps_float32x2_t (void)
  27 {
  28   int i;
  29   int ret = 1;
  30
  31   const float32_t value_pool[] = {0.2, 0.4};
  32   const float32_t reciprocal_pool[] = {5.0, 2.5};
  33   float32x2_t value = vld1_f32 (value_pool);
  34   float32x2_t reciprocal = vld1_f32 (reciprocal_pool);
  35
  36   float32x2_t step = vrecpe_f32 (value);
  37   /* 3 steps should give us within ~0.001 accuracy.  */
  38   for (i = 0; i < 3; i++)
  39     step = step * vrecps_f32 (step, value);
  40
  41   ret &= fabs (vget_lane_f32 (step, 0)
  42                - vget_lane_f32 (reciprocal, 0)) < 0.001;
  43   ret &= fabs (vget_lane_f32 (step, 1)
  44                - vget_lane_f32 (reciprocal, 1)) < 0.001;
  45
  46   return ret;
  47 }
  48
  49 /* { dg-final { scan-assembler "frecpe\\tv\[0-9\]+.2s, v\[0-9\]+.2s" } } */
  50 /* { dg-final { scan-assembler "frecps\\tv\[0-9\]+.2s, v\[0-9\]+.2s, v\[0-9\]+.2s" } } */
  51
  52 int
  53 test_frecps_float32x4_t (void)
  54 {
  55   int i;
  56   int ret = 1;
  57
  58   const float32_t value_pool[] = {0.2, 0.4, 0.5, 0.8};
  59   const float32_t reciprocal_pool[] = {5.0, 2.5, 2.0, 1.25};
  60   float32x4_t value = vld1q_f32 (value_pool);
  61   float32x4_t reciprocal = vld1q_f32 (reciprocal_pool);
  62
  63   float32x4_t step = vrecpeq_f32 (value);
  64   /* 3 steps should give us within ~0.001 accuracy.  */
  65   for (i = 0; i < 3; i++)
  66     step = step * vrecpsq_f32 (step, value);
  67
  68   ret &= fabs (vgetq_lane_f32 (step, 0)
  69                - vgetq_lane_f32 (reciprocal, 0)) < 0.001;
  70   ret &= fabs (vgetq_lane_f32 (step, 1)
  71                - vgetq_lane_f32 (reciprocal, 1)) < 0.001;
  72   ret &= fabs (vgetq_lane_f32 (step, 2)
  73                - vgetq_lane_f32 (reciprocal, 2)) < 0.001;
  74   ret &= fabs (vgetq_lane_f32 (step, 3)
  75                - vgetq_lane_f32 (reciprocal, 3)) < 0.001;
  76
  77   return ret;
  78 }
  79
  80 /* { dg-final { scan-assembler "frecpe\\tv\[0-9\]+.4s, v\[0-9\]+.4s" } } */
  81 /* { dg-final { scan-assembler "frecps\\tv\[0-9\]+.4s, v\[0-9\]+.4s, v\[0-9\]+.4s" } } */
  82
  83 int
  84 test_frecps_float64_t (void)
  85 {
  86   int i;
  87   float64_t value = 0.2;
  88   float64_t reciprocal = 5.0;
  89   float64_t step = vrecped_f64 (value);
  90   /* 3 steps should give us within ~0.001 accuracy.  */
  91   for (i = 0; i < 3; i++)
  92     step = step * vrecpsd_f64 (step, value);
  93
  94   return fabs (step - reciprocal) < 0.001;
  95 }
  96
  97 /* { dg-final { scan-assembler "frecpe\\td\[0-9\]+, d\[0-9\]+" } } */
  98 /* { dg-final { scan-assembler "frecps\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" } } */
  99
 100 int
 101 test_frecps_float64x2_t (void)
 102 {
 103   int i;
 104   int ret = 1;
 105
 106   const float64_t value_pool[] = {0.2, 0.4};
 107   const float64_t reciprocal_pool[] = {5.0, 2.5};
 108   float64x2_t value = vld1q_f64 (value_pool);
 109   float64x2_t reciprocal = vld1q_f64 (reciprocal_pool);
 110
 111   float64x2_t step = vrecpeq_f64 (value);
 112   /* 3 steps should give us within ~0.001 accuracy.  */
 113   for (i = 0; i < 3; i++)
 114     step = step * vrecpsq_f64 (step, value);
 115
 116   ret &= fabs (vgetq_lane_f64 (step, 0)
 117                - vgetq_lane_f64 (reciprocal, 0)) < 0.001;
 118   ret &= fabs (vgetq_lane_f64 (step, 1)
 119                - vgetq_lane_f64 (reciprocal, 1)) < 0.001;
 120
 121   return ret;
 122 }
 123
 124 /* { dg-final { scan-assembler "frecpe\\tv\[0-9\]+.2d, v\[0-9\]+.2d" } } */
 125 /* { dg-final { scan-assembler "frecps\\tv\[0-9\]+.2d, v\[0-9\]+.2d, v\[0-9\]+.2d" } } */
 126
 127 int
 128 main (int argc, char **argv)
 129 {
 130   if (!test_frecps_float32_t ())
 131     abort ();
 132   if (!test_frecps_float32x2_t ())
 133     abort ();
 134   if (!test_frecps_float32x4_t ())
 135     abort ();
 136   if (!test_frecps_float64_t ())
 137     abort ();
 138   if (!test_frecps_float64x2_t ())
 139     abort ();
 140
 141   return 0;
 142 }
 143