From d05d070973b03dedf93c2007335d64ecf4f462ba Mon Sep 17 00:00:00 2001 From: James Greenhalgh Date: Sat, 20 Jul 2013 07:56:41 +0000 Subject: [PATCH] [AArch64] Rewrite vabs_s<8,16,32,64> AdvSIMD intrinsics to fold to tree gcc/ * config/aarch64/aarch64-builtins.c (aarch64_fold_builtin): Fold abs in all modes. * config/aarch64/aarch64-simd-builtins.def (abs): Enable for all modes. * config/aarch64/arm_neon.h (vabs_s<8,16,32,64): Rewrite using builtins. (vabs_f64): Add missing intrinsic. gcc/testsuite/ * gcc.target/aarch64/vabs_intrinsic_1.c: New file. From-SVN: r201083 --- gcc/ChangeLog | 10 ++ gcc/config/aarch64/aarch64-builtins.c | 2 +- gcc/config/aarch64/aarch64-simd-builtins.def | 2 +- gcc/config/aarch64/arm_neon.h | 125 ++++++++------------- gcc/testsuite/ChangeLog | 4 + .../gcc.target/aarch64/vabs_intrinsic_1.c | 101 +++++++++++++++++ 6 files changed, 165 insertions(+), 79 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/vabs_intrinsic_1.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 822639b076f..b457e6cdd5c 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,13 @@ +2013-07-20 James Greenhalgh + + * config/aarch64/aarch64-builtins.c + (aarch64_fold_builtin): Fold abs in all modes. + * config/aarch64/aarch64-simd-builtins.def + (abs): Enable for all modes. + * config/aarch64/arm_neon.h + (vabs_s<8,16,32,64): Rewrite using builtins. + (vabs_f64): Add missing intrinsic. + 2013-07-19 Ian Bolton * config/aarch64/arm_neon.h (vabs_s64): New function diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c index f49f06b1a99..6816b9cfdaa 100644 --- a/gcc/config/aarch64/aarch64-builtins.c +++ b/gcc/config/aarch64/aarch64-builtins.c @@ -1325,7 +1325,7 @@ aarch64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, tree *args, switch (fcode) { - BUILTIN_VDQF (UNOP, abs, 2) + BUILTIN_VALLDI (UNOP, abs, 2) return fold_build1 (ABS_EXPR, type, args[0]); break; BUILTIN_VALLDI (BINOP, cmge, 0) diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index af2dd6efe0f..55dead6e404 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -347,7 +347,7 @@ BUILTIN_VDQF (UNOP, frecpe, 0) BUILTIN_VDQF (BINOP, frecps, 0) - BUILTIN_VDQF (UNOP, abs, 2) + BUILTIN_VALLDI (UNOP, abs, 2) VAR1 (UNOP, vec_unpacks_hi_, 10, v4sf) VAR1 (BINOP, float_truncate_hi_, 0, v4sf) diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 122fd7daa02..99cf123e29e 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -4468,83 +4468,6 @@ vabds_f32 (float32_t a, float32_t b) return result; } -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vabs_s8 (int8x8_t a) -{ - int8x8_t result; - __asm__ ("abs %0.8b,%1.8b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vabs_s16 (int16x4_t a) -{ - int16x4_t result; - __asm__ ("abs %0.4h,%1.4h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vabs_s32 (int32x2_t a) -{ - int32x2_t result; - __asm__ ("abs %0.2s,%1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vabsq_s8 (int8x16_t a) -{ - int8x16_t result; - __asm__ ("abs %0.16b,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vabsq_s16 (int16x8_t a) -{ - int16x8_t result; - __asm__ ("abs %0.8h,%1.8h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vabsq_s32 (int32x4_t a) -{ - int32x4_t result; - __asm__ ("abs %0.4s,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vabsq_s64 (int64x2_t a) -{ - int64x2_t result; - __asm__ ("abs %0.2d,%1.2d" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - __extension__ static __inline int16_t __attribute__ ((__always_inline__)) vaddlv_s8 (int8x8_t a) { @@ -17395,6 +17318,30 @@ vabs_f32 (float32x2_t __a) return __builtin_aarch64_absv2sf (__a); } +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vabs_f64 (float64x1_t __a) +{ + return __builtin_fabs (__a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vabs_s8 (int8x8_t __a) +{ + return __builtin_aarch64_absv8qi (__a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vabs_s16 (int16x4_t __a) +{ + return __builtin_aarch64_absv4hi (__a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vabs_s32 (int32x2_t __a) +{ + return __builtin_aarch64_absv2si (__a); +} + __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vabs_s64 (int64x1_t __a) { @@ -17413,6 +17360,30 @@ vabsq_f64 (float64x2_t __a) return __builtin_aarch64_absv2df (__a); } +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vabsq_s8 (int8x16_t __a) +{ + return __builtin_aarch64_absv16qi (__a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vabsq_s16 (int16x8_t __a) +{ + return __builtin_aarch64_absv8hi (__a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vabsq_s32 (int32x4_t __a) +{ + return __builtin_aarch64_absv4si (__a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vabsq_s64 (int64x2_t __a) +{ + return __builtin_aarch64_absv2di (__a); +} + /* vadd */ __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 3f5a355758c..ae3b789ff42 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2013-07-20 James Greenhalgh + + * gcc.target/aarch64/vabs_intrinsic_1.c: New file. + 2013-07-20 Joern Rennecke * gcc.dg/pr57154.c: Add dg-require-effective-target scheduling. diff --git a/gcc/testsuite/gcc.target/aarch64/vabs_intrinsic_1.c b/gcc/testsuite/gcc.target/aarch64/vabs_intrinsic_1.c new file mode 100644 index 00000000000..b34738c00a0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vabs_intrinsic_1.c @@ -0,0 +1,101 @@ +/* { dg-do run } */ +/* { dg-options "-O3 --save-temps" } */ + +#include + +extern void abort (void); + +#define ETYPE(size) int##size##_t +#define VTYPE(size, lanes) int##size##x##lanes##_t + +#define TEST_VABS(q, size, lanes) \ +static void \ +test_vabs##q##_##size (ETYPE (size) * res, \ + const ETYPE (size) *in1) \ +{ \ + VTYPE (size, lanes) a = vld1##q##_s##size (res); \ + VTYPE (size, lanes) b = vld1##q##_s##size (in1); \ + a = vabs##q##_s##size (b); \ + vst1##q##_s##size (res, a); \ +} + +#define BUILD_VARS(width, n_lanes, n_half_lanes) \ +TEST_VABS (, width, n_half_lanes) \ +TEST_VABS (q, width, n_lanes) \ + +BUILD_VARS (64, 2, 1) +BUILD_VARS (32, 4, 2) +BUILD_VARS (16, 8, 4) +BUILD_VARS (8, 16, 8) + +#define POOL1 {-10} +#define POOL2 {2, -10} +#define POOL4 {0, -10, 2, -3} +#define POOL8 {0, -10, 2, -3, 4, -50, 6, -70} +#define POOL16 {0, -10, 2, -3, 4, -50, 6, -70, \ + -5, 10, -2, 3, -4, 50, -6, 70} + +#define EXPECTED1 {10} +#define EXPECTED2 {2, 10} +#define EXPECTED4 {0, 10, 2, 3} +#define EXPECTED8 {0, 10, 2, 3, 4, 50, 6, 70} +#define EXPECTED16 {0, 10, 2, 3, 4, 50, 6, 70, \ + 5, 10, 2, 3, 4, 50, 6, 70} + +#define BUILD_TEST(size, lanes_64, lanes_128) \ +static void \ +test_##size (void) \ +{ \ + int i; \ + ETYPE (size) pool1[lanes_64] = POOL##lanes_64; \ + ETYPE (size) res1[lanes_64] = {0}; \ + ETYPE (size) expected1[lanes_64] = EXPECTED##lanes_64; \ + ETYPE (size) pool2[lanes_128] = POOL##lanes_128; \ + ETYPE (size) res2[lanes_128] = {0}; \ + ETYPE (size) expected2[lanes_128] = EXPECTED##lanes_128; \ + \ + /* Forcefully avoid optimization. */ \ + asm volatile ("" : : : "memory"); \ + test_vabs_##size (res1, pool1); \ + for (i = 0; i < lanes_64; i++) \ + if (res1[i] != expected1[i]) \ + abort (); \ + \ + /* Forcefully avoid optimization. */ \ + asm volatile ("" : : : "memory"); \ + test_vabsq_##size (res2, pool2); \ + for (i = 0; i < lanes_128; i++) \ + if (res2[i] != expected2[i]) \ + abort (); \ +} + +/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.8b, v\[0-9\]+\.8b" 1 } } */ +/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */ +BUILD_TEST (8 , 8, 16) + +/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.4h, v\[0-9\]+\.4h" 1 } } */ +/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.8h, v\[0-9\]+\.8h" 1 } } */ +BUILD_TEST (16, 4, 8) + +/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" 1 } } */ +/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" 1 } } */ +BUILD_TEST (32, 2, 4) + +/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" 1 } } */ +BUILD_TEST (64, 1, 2) + +#undef BUILD_TEST + +#define BUILD_TEST(size) test_##size () + +int +main (int argc, char **argv) +{ + BUILD_TEST (8); + BUILD_TEST (16); + BUILD_TEST (32); + BUILD_TEST (64); + return 0; +} + +/* { dg-final { cleanup-saved-temps } } */ -- 2.11.4.GIT