From 32e69596af762503c636cdec90567c8d4e30629d Mon Sep 17 00:00:00 2001 From: mshawcroft Date: Wed, 9 Oct 2013 12:09:11 +0000 Subject: [PATCH] [AArch64] Implement vclz ADVSimd intrinsic. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@203314 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 10 + gcc/config/aarch64/aarch64-simd-builtins.def | 2 +- gcc/config/aarch64/aarch64.h | 2 +- gcc/config/aarch64/arm_neon.h | 206 ++++------ gcc/testsuite/ChangeLog | 4 + gcc/testsuite/gcc.target/aarch64/vclz.c | 574 +++++++++++++++++++++++++++ 6 files changed, 664 insertions(+), 134 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/vclz.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 4248751cdbf..d04904c4408 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,15 @@ 2013-10-09 Alex Velenko + * config/aarch64/arm_neon.h (vclz_s8, vclz_s16, vclz_s32) + (vclzq_s8, vclzq_s16, vclzq_s32, vclz_u8, vclz_u16, vclz_u32) + (vclzq_u8, vclzq_u16, vclzq_u32): Replace ASM with C. + * config/aarch64/aarch64.h + (CLZ_DEFINED_VALUE_AT_ZERO): Macro fixed for clz. + * config/aarch64/aarch64-simd-builtins.def + (VAR1 (UNOP, clz, 0, v4si)): Replaced with iterator. + +2013-10-09 Alex Velenko + * config/aarch64/arm_neon.h (vadd_f64, vsub_f64): Implementation added. 2013-10-09 Alex Velenko diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index 35897f39395..c18b150a1f5 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -45,7 +45,7 @@ BUILTIN_VDQF (UNOP, sqrt, 2) BUILTIN_VD_BHSI (BINOP, addp, 0) VAR1 (UNOP, addp, 0, di) - VAR1 (UNOP, clz, 2, v4si) + BUILTIN_VDQ_BHSI (UNOP, clz, 2) BUILTIN_VALL (GETLANE, get_lane, 0) VAR1 (GETLANE, get_lane, 0, di) diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index da2b46d14cf..7a80e96385f 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -739,7 +739,7 @@ do { \ : reverse_condition (CODE)) #define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ - ((VALUE) = ((MODE) == SImode ? 32 : 64), 2) + ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE)) #define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ ((VALUE) = ((MODE) == SImode ? 32 : 64), 2) diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index db9bf28227e..482d7d03ed4 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -5158,138 +5158,6 @@ vclsq_s32 (int32x4_t a) return result; } -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vclz_s8 (int8x8_t a) -{ - int8x8_t result; - __asm__ ("clz %0.8b,%1.8b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vclz_s16 (int16x4_t a) -{ - int16x4_t result; - __asm__ ("clz %0.4h,%1.4h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vclz_s32 (int32x2_t a) -{ - int32x2_t result; - __asm__ ("clz %0.2s,%1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vclz_u8 (uint8x8_t a) -{ - uint8x8_t result; - __asm__ ("clz %0.8b,%1.8b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vclz_u16 (uint16x4_t a) -{ - uint16x4_t result; - __asm__ ("clz %0.4h,%1.4h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vclz_u32 (uint32x2_t a) -{ - uint32x2_t result; - __asm__ ("clz %0.2s,%1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vclzq_s8 (int8x16_t a) -{ - int8x16_t result; - __asm__ ("clz %0.16b,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vclzq_s16 (int16x8_t a) -{ - int16x8_t result; - __asm__ ("clz %0.8h,%1.8h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vclzq_s32 (int32x4_t a) -{ - int32x4_t result; - __asm__ ("clz %0.4s,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vclzq_u8 (uint8x16_t a) -{ - uint8x16_t result; - __asm__ ("clz %0.16b,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vclzq_u16 (uint16x8_t a) -{ - uint16x8_t result; - __asm__ ("clz %0.8h,%1.8h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vclzq_u32 (uint32x4_t a) -{ - uint32x4_t result; - __asm__ ("clz %0.4s,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) vcnt_p8 (poly8x8_t a) { @@ -17934,6 +17802,80 @@ vcltzd_f64 (float64_t __a) return __a < 0.0 ? -1ll : 0ll; } +/* vclz. */ + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vclz_s8 (int8x8_t __a) +{ + return __builtin_aarch64_clzv8qi (__a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vclz_s16 (int16x4_t __a) +{ + return __builtin_aarch64_clzv4hi (__a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vclz_s32 (int32x2_t __a) +{ + return __builtin_aarch64_clzv2si (__a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vclz_u8 (uint8x8_t __a) +{ + return (uint8x8_t)__builtin_aarch64_clzv8qi ((int8x8_t)__a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vclz_u16 (uint16x4_t __a) +{ + return (uint16x4_t)__builtin_aarch64_clzv4hi ((int16x4_t)__a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vclz_u32 (uint32x2_t __a) +{ + return (uint32x2_t)__builtin_aarch64_clzv2si ((int32x2_t)__a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vclzq_s8 (int8x16_t __a) +{ + return __builtin_aarch64_clzv16qi (__a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vclzq_s16 (int16x8_t __a) +{ + return __builtin_aarch64_clzv8hi (__a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vclzq_s32 (int32x4_t __a) +{ + return __builtin_aarch64_clzv4si (__a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vclzq_u8 (uint8x16_t __a) +{ + return (uint8x16_t)__builtin_aarch64_clzv16qi ((int8x16_t)__a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vclzq_u16 (uint16x8_t __a) +{ + return (uint16x8_t)__builtin_aarch64_clzv8hi ((int16x8_t)__a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vclzq_u32 (uint32x4_t __a) +{ + return (uint32x4_t)__builtin_aarch64_clzv4si ((int32x4_t)__a); +} + /* vcvt (double -> float). */ __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index f15911bd280..0ece8c06510 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,5 +1,9 @@ 2013-10-09 Alex Velenko + * gcc.target/aarch64/vclz.c: New testcase. + +2013-10-09 Alex Velenko + * gcc.target/aarch64/vadd_f64.c: New testcase. * gcc.target/aarch64/vsub_f64.c: New testcase. diff --git a/gcc/testsuite/gcc.target/aarch64/vclz.c b/gcc/testsuite/gcc.target/aarch64/vclz.c new file mode 100644 index 00000000000..006f80d77b5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vclz.c @@ -0,0 +1,574 @@ +/* Test vclz works correctly. */ +/* { dg-do run } */ +/* { dg-options "-std=gnu99 -O3 -Wno-div-by-zero --save-temps" } */ +#include + +extern void abort (void); + +/* Tests in binary should look like: + 0 + 1 + 10 + 101 + 1010 + 10101 + etc. */ + +#define TEST0 0 +#define TEST1 0x1 +#define TEST2 0x2 +#define TEST3 0x5 +#define TEST4 0xa +#define TEST5 0x15 +#define TEST6 0x2a +#define TEST7 0x55 +#define TEST8 0xaa +#define TEST9 0x155 +#define TEST10 0x2aa +#define TEST11 0x555 +#define TEST12 0xaaa +#define TEST13 0x1555 +#define TEST14 0x2aaa +#define TEST15 0x5555 +#define TEST16 0xaaaa +#define TEST17 0x15555 +#define TEST18 0x2aaaa +#define TEST19 0x55555 +#define TEST20 0xaaaaa +#define TEST21 0x155555 +#define TEST22 0x2aaaaa +#define TEST23 0x555555 +#define TEST24 0xaaaaaa +#define TEST25 0x1555555 +#define TEST26 0x2aaaaaa +#define TEST27 0x5555555 +#define TEST28 0xaaaaaaa +#define TEST29 0x15555555 +#define TEST30 0x2aaaaaaa +#define TEST31 0x55555555 +#define TEST32 0xaaaaaaaa + +#define INHIB_OPTIMIZATION asm volatile ("" : : : "memory") + +#define CONCAT(a, b) a##b +#define CONCAT1(a, b) CONCAT (a, b) +#define REG_INFEX64 _ +#define REG_INFEX128 q_ +#define SIGNED0 u +#define SIGNED1 s +#define SIGNED(x) SIGNED##x +#define REG_INFEX(reg_len) REG_INFEX##reg_len +#define POSTFIX(reg_len, data_len, is_signed) \ + CONCAT1 (REG_INFEX (reg_len), CONCAT1 (SIGNED (is_signed), data_len)) +#define DATA_TYPE(data_len) DATA_TYPE_##data_len +#define LOAD_INST(reg_len, data_len, is_signed) \ + CONCAT1 (vld1, POSTFIX (reg_len, data_len, is_signed)) +#define CLZ_INST(reg_len, data_len, is_signed) \ + CONCAT1 (vclz, POSTFIX (reg_len, data_len, is_signed)) + +#define RUN_TEST(test_set, answ_set, reg_len, data_len, is_signed, n) \ + a = LOAD_INST (reg_len, data_len, is_signed) (test_set); \ + b = LOAD_INST (reg_len, data_len, is_signed) (answ_set); \ + INHIB_OPTIMIZATION; \ + a = CLZ_INST (reg_len, data_len, is_signed) (a); \ + for (i = 0; i < n; i++) \ + { \ + INHIB_OPTIMIZATION; \ + if (a [i] != b [i]) \ + { \ + return 1; \ + } \ + } + +int +test_vclz_s8 () +{ + int i; + int8x8_t a; + int8x8_t b; + + int8_t test_set0[8] = { + TEST0, TEST1, TEST2, TEST3, + TEST4, TEST5, TEST6, TEST7 + }; + int8_t test_set1[8] = { + TEST8, TEST8, TEST8, TEST8, + TEST8, TEST8, TEST8, TEST8 + }; + int8_t answ_set0[8] = { + 8, 7, 6, 5, + 4, 3, 2, 1 + }; + int8_t answ_set1[8] = { + 0, 0, 0, 0, + 0, 0, 0, 0 + }; + RUN_TEST (test_set0, answ_set0, 64, 8, 1, 8); + RUN_TEST (test_set1, answ_set1, 64, 8, 1, 1); + + return 0; +} + +/* Double scan-assembler-times to take account of unsigned functions. */ +/* { dg-final { scan-assembler-times "clz\\tv\[0-9\]+\.8b, v\[0-9\]+\.8b" 4 } } */ + +int +test_vclz_s16 () +{ + int i; + int16x4_t a; + int16x4_t b; + + int16_t test_set0[4] = { TEST0, TEST1, TEST2, TEST3 }; + int16_t test_set1[4] = { TEST4, TEST5, TEST6, TEST7 }; + int16_t test_set2[4] = { TEST8, TEST9, TEST10, TEST11 }; + int16_t test_set3[4] = { TEST12, TEST13, TEST14, TEST15 }; + int16_t test_set4[4] = { TEST16, TEST16, TEST16, TEST16 }; + + int16_t answ_set0[4] = { 16, 15, 14, 13 }; + int16_t answ_set1[4] = { 12, 11, 10, 9 }; + int16_t answ_set2[4] = { 8, 7, 6, 5 }; + int16_t answ_set3[4] = { 4, 3, 2, 1 }; + int16_t answ_set4[4] = { 0, 0, 0, 0 }; + + RUN_TEST (test_set0, answ_set0, 64, 16, 1, 4); + RUN_TEST (test_set1, answ_set1, 64, 16, 1, 4); + RUN_TEST (test_set2, answ_set2, 64, 16, 1, 4); + RUN_TEST (test_set3, answ_set3, 64, 16, 1, 4); + RUN_TEST (test_set4, answ_set4, 64, 16, 1, 1); + + return 0; +} + +/* Double scan-assembler-times to take account of unsigned functions. */ +/* { dg-final { scan-assembler-times "clz\\tv\[0-9\]+\.4h, v\[0-9\]+\.4h" 10} } */ + +int +test_vclz_s32 () +{ + int i; + int32x2_t a; + int32x2_t b; + + int32_t test_set0[2] = { TEST0, TEST1 }; + int32_t test_set1[2] = { TEST2, TEST3 }; + int32_t test_set2[2] = { TEST4, TEST5 }; + int32_t test_set3[2] = { TEST6, TEST7 }; + int32_t test_set4[2] = { TEST8, TEST9 }; + int32_t test_set5[2] = { TEST10, TEST11 }; + int32_t test_set6[2] = { TEST12, TEST13 }; + int32_t test_set7[2] = { TEST14, TEST15 }; + int32_t test_set8[2] = { TEST16, TEST17 }; + int32_t test_set9[2] = { TEST18, TEST19 }; + int32_t test_set10[2] = { TEST20, TEST21 }; + int32_t test_set11[2] = { TEST22, TEST23 }; + int32_t test_set12[2] = { TEST24, TEST25 }; + int32_t test_set13[2] = { TEST26, TEST27 }; + int32_t test_set14[2] = { TEST28, TEST29 }; + int32_t test_set15[2] = { TEST30, TEST31 }; + int32_t test_set16[2] = { TEST32, TEST32 }; + + int32_t answ_set0[2] = { 32, 31 }; + int32_t answ_set1[2] = { 30, 29 }; + int32_t answ_set2[2] = { 28, 27 }; + int32_t answ_set3[2] = { 26, 25 }; + int32_t answ_set4[2] = { 24, 23 }; + int32_t answ_set5[2] = { 22, 21 }; + int32_t answ_set6[2] = { 20, 19 }; + int32_t answ_set7[2] = { 18, 17 }; + int32_t answ_set8[2] = { 16, 15 }; + int32_t answ_set9[2] = { 14, 13 }; + int32_t answ_set10[2] = { 12, 11 }; + int32_t answ_set11[2] = { 10, 9 }; + int32_t answ_set12[2] = { 8, 7 }; + int32_t answ_set13[2] = { 6, 5 }; + int32_t answ_set14[2] = { 4, 3 }; + int32_t answ_set15[2] = { 2, 1 }; + int32_t answ_set16[2] = { 0, 0 }; + + RUN_TEST (test_set0, answ_set0, 64, 32, 1, 2); + RUN_TEST (test_set1, answ_set1, 64, 32, 1, 2); + RUN_TEST (test_set2, answ_set2, 64, 32, 1, 2); + RUN_TEST (test_set3, answ_set3, 64, 32, 1, 2); + RUN_TEST (test_set4, answ_set4, 64, 32, 1, 2); + RUN_TEST (test_set5, answ_set5, 64, 32, 1, 2); + RUN_TEST (test_set6, answ_set6, 64, 32, 1, 2); + RUN_TEST (test_set7, answ_set7, 64, 32, 1, 2); + RUN_TEST (test_set8, answ_set8, 64, 32, 1, 2); + RUN_TEST (test_set9, answ_set9, 64, 32, 1, 2); + RUN_TEST (test_set10, answ_set10, 64, 32, 1, 2); + RUN_TEST (test_set11, answ_set11, 64, 32, 1, 2); + RUN_TEST (test_set12, answ_set12, 64, 32, 1, 2); + RUN_TEST (test_set13, answ_set13, 64, 32, 1, 2); + RUN_TEST (test_set14, answ_set14, 64, 32, 1, 2); + RUN_TEST (test_set15, answ_set15, 64, 32, 1, 2); + RUN_TEST (test_set16, answ_set16, 64, 32, 1, 1); + + return 0; +} + +/* Double scan-assembler-times to take account of unsigned functions. */ +/* { dg-final { scan-assembler-times "clz\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" 34 } } */ + +int +test_vclzq_s8 () +{ + int i; + int8x16_t a; + int8x16_t b; + + int8_t test_set0[16] = { + TEST0, TEST1, TEST2, TEST3, TEST4, TEST5, TEST6, TEST7, + TEST8, TEST8, TEST8, TEST8, TEST8, TEST8, TEST8, TEST8 + }; + int8_t answ_set0[16] = { + 8, 7, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0 + }; + RUN_TEST (test_set0, answ_set0, 128, 8, 1, 9); + return 0; +} + +/* Double scan-assembler-times to take account of unsigned functions. */ +/* { dg-final { scan-assembler-times "clz\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b" 2 } } */ + +int +test_vclzq_s16 () +{ + int i; + int16x8_t a; + int16x8_t b; + + int16_t test_set0[8] = { + TEST0, TEST1, TEST2, TEST3, TEST4, TEST5, TEST6, TEST7 + }; + int16_t test_set1[8] = { + TEST8, TEST9, TEST10, TEST11, TEST12, TEST13, TEST14, TEST15 + }; + int16_t test_set2[8] = { + TEST16, TEST16, TEST16, TEST16, TEST16, TEST16, TEST16, TEST16 + }; + + int16_t answ_set0[8] = { + 16, 15, 14, 13, 12, 11, 10, 9 + }; + int16_t answ_set1[8] = { + 8, 7, 6, 5, 4, 3, 2, 1 + }; + int16_t answ_set2[8] = { + 0, 0, 0, 0, 0, 0, 0, 0 + }; + RUN_TEST (test_set0, answ_set0, 128, 16, 1, 8); + RUN_TEST (test_set1, answ_set1, 128, 16, 1, 8); + RUN_TEST (test_set2, answ_set2, 128, 16, 1, 1); + + return 0; +} + +/* Double scan-assembler-times to take account of unsigned functions. */ +/* { dg-final { scan-assembler-times "clz\\tv\[0-9\]+\.8h, v\[0-9\]+\.8h" 6 } } */ + +int +test_vclzq_s32 () +{ + int i; + int32x4_t a; + int32x4_t b; + + int32_t test_set0[4] = { TEST0, TEST1, TEST2, TEST3 }; + int32_t test_set1[4] = { TEST4, TEST5, TEST6, TEST7 }; + int32_t test_set2[4] = { TEST8, TEST9, TEST10, TEST11 }; + int32_t test_set3[4] = { TEST12, TEST13, TEST14, TEST15 }; + int32_t test_set4[4] = { TEST16, TEST17, TEST18, TEST19 }; + int32_t test_set5[4] = { TEST20, TEST21, TEST22, TEST23 }; + int32_t test_set6[4] = { TEST24, TEST25, TEST26, TEST27 }; + int32_t test_set7[4] = { TEST28, TEST29, TEST30, TEST31 }; + int32_t test_set8[4] = { TEST32, TEST32, TEST32, TEST32 }; + + int32_t answ_set0[4] = { 32, 31, 30, 29 }; + int32_t answ_set1[4] = { 28, 27, 26, 25 }; + int32_t answ_set2[4] = { 24, 23, 22, 21 }; + int32_t answ_set3[4] = { 20, 19, 18, 17 }; + int32_t answ_set4[4] = { 16, 15, 14, 13 }; + int32_t answ_set5[4] = { 12, 11, 10, 9 }; + int32_t answ_set6[4] = { 8, 7, 6, 5 }; + int32_t answ_set7[4] = { 4, 3, 2, 1 }; + int32_t answ_set8[4] = { 0, 0, 0, 0 }; + + RUN_TEST (test_set0, answ_set0, 128, 32, 1, 4); + RUN_TEST (test_set1, answ_set1, 128, 32, 1, 4); + RUN_TEST (test_set2, answ_set2, 128, 32, 1, 4); + RUN_TEST (test_set3, answ_set3, 128, 32, 1, 4); + RUN_TEST (test_set4, answ_set4, 128, 32, 1, 1); + + return 0; +} + +/* Double scan-assembler-times to take account of unsigned functions. */ +/* { dg-final { scan-assembler-times "clz\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" 10 } } */ + +/* Unsigned versions. */ + +int +test_vclz_u8 () +{ + int i; + uint8x8_t a; + uint8x8_t b; + + uint8_t test_set0[8] = { + TEST0, TEST1, TEST2, TEST3, TEST4, TEST5, TEST6, TEST7 + }; + uint8_t test_set1[8] = { + TEST8, TEST8, TEST8, TEST8, TEST8, TEST8, TEST8, TEST8 + }; + uint8_t answ_set0[8] = { + 8, 7, 6, 5, 4, 3, 2, 1 + }; + uint8_t answ_set1[8] = { + 0, 0, 0, 0, 0, 0, 0, 0 + }; + + RUN_TEST (test_set0, answ_set0, 64, 8, 0, 8); + RUN_TEST (test_set1, answ_set1, 64, 8, 0, 1); + + return 0; +} + +/* ASM scan near test for signed version. */ + +int +test_vclz_u16 () +{ + int i; + uint16x4_t a; + uint16x4_t b; + + uint16_t test_set0[4] = { TEST0, TEST1, TEST2, TEST3 }; + uint16_t test_set1[4] = { TEST4, TEST5, TEST6, TEST7 }; + uint16_t test_set2[4] = { TEST8, TEST9, TEST10, TEST11 }; + uint16_t test_set3[4] = { TEST12, TEST13, TEST14, TEST15 }; + uint16_t test_set4[4] = { TEST16, TEST16, TEST16, TEST16 }; + + uint16_t answ_set0[4] = { 16, 15, 14, 13 }; + uint16_t answ_set1[4] = { 12, 11, 10, 9 }; + uint16_t answ_set2[4] = { 8, 7, 6, 5 }; + uint16_t answ_set3[4] = { 4, 3, 2, 1 }; + uint16_t answ_set4[4] = { 0, 0, 0, 0 }; + + RUN_TEST (test_set0, answ_set0, 64, 16, 0, 4); + RUN_TEST (test_set1, answ_set1, 64, 16, 0, 4); + RUN_TEST (test_set2, answ_set2, 64, 16, 0, 4); + RUN_TEST (test_set3, answ_set3, 64, 16, 0, 4); + RUN_TEST (test_set4, answ_set4, 64, 16, 0, 1); + + return 0; +} + +/* ASM scan near test for signed version. */ + +int +test_vclz_u32 () +{ + int i; + uint32x2_t a; + uint32x2_t b; + + uint32_t test_set0[2] = { TEST0, TEST1 }; + uint32_t test_set1[2] = { TEST2, TEST3 }; + uint32_t test_set2[2] = { TEST4, TEST5 }; + uint32_t test_set3[2] = { TEST6, TEST7 }; + uint32_t test_set4[2] = { TEST8, TEST9 }; + uint32_t test_set5[2] = { TEST10, TEST11 }; + uint32_t test_set6[2] = { TEST12, TEST13 }; + uint32_t test_set7[2] = { TEST14, TEST15 }; + uint32_t test_set8[2] = { TEST16, TEST17 }; + uint32_t test_set9[2] = { TEST18, TEST19 }; + uint32_t test_set10[2] = { TEST20, TEST21 }; + uint32_t test_set11[2] = { TEST22, TEST23 }; + uint32_t test_set12[2] = { TEST24, TEST25 }; + uint32_t test_set13[2] = { TEST26, TEST27 }; + uint32_t test_set14[2] = { TEST28, TEST29 }; + uint32_t test_set15[2] = { TEST30, TEST31 }; + uint32_t test_set16[2] = { TEST32, TEST32 }; + + uint32_t answ_set0[2] = { 32, 31 }; + uint32_t answ_set1[2] = { 30, 29 }; + uint32_t answ_set2[2] = { 28, 27 }; + uint32_t answ_set3[2] = { 26, 25 }; + uint32_t answ_set4[2] = { 24, 23 }; + uint32_t answ_set5[2] = { 22, 21 }; + uint32_t answ_set6[2] = { 20, 19 }; + uint32_t answ_set7[2] = { 18, 17 }; + uint32_t answ_set8[2] = { 16, 15 }; + uint32_t answ_set9[2] = { 14, 13 }; + uint32_t answ_set10[2] = { 12, 11 }; + uint32_t answ_set11[2] = { 10, 9 }; + uint32_t answ_set12[2] = { 8, 7 }; + uint32_t answ_set13[2] = { 6, 5 }; + uint32_t answ_set14[2] = { 4, 3 }; + uint32_t answ_set15[2] = { 2, 1 }; + uint32_t answ_set16[2] = { 0, 0 }; + + RUN_TEST (test_set0, answ_set0, 64, 32, 0, 2); + RUN_TEST (test_set1, answ_set1, 64, 32, 0, 2); + RUN_TEST (test_set2, answ_set2, 64, 32, 0, 2); + RUN_TEST (test_set3, answ_set3, 64, 32, 0, 2); + RUN_TEST (test_set4, answ_set4, 64, 32, 0, 2); + RUN_TEST (test_set5, answ_set5, 64, 32, 0, 2); + RUN_TEST (test_set6, answ_set6, 64, 32, 0, 2); + RUN_TEST (test_set7, answ_set7, 64, 32, 0, 2); + RUN_TEST (test_set8, answ_set8, 64, 32, 0, 2); + RUN_TEST (test_set9, answ_set9, 64, 32, 0, 2); + RUN_TEST (test_set10, answ_set10, 64, 32, 0, 2); + RUN_TEST (test_set11, answ_set11, 64, 32, 0, 2); + RUN_TEST (test_set12, answ_set12, 64, 32, 0, 2); + RUN_TEST (test_set13, answ_set13, 64, 32, 0, 2); + RUN_TEST (test_set14, answ_set14, 64, 32, 0, 2); + RUN_TEST (test_set15, answ_set15, 64, 32, 0, 2); + RUN_TEST (test_set16, answ_set16, 64, 32, 0, 1); + + return 0; +} + +/* ASM scan near test for signed version. */ + +int +test_vclzq_u8 () +{ + int i; + uint8x16_t a; + uint8x16_t b; + + uint8_t test_set0[16] = { + TEST0, TEST1, TEST2, TEST3, TEST4, TEST5, TEST6, TEST7, + TEST8, TEST8, TEST8, TEST8, TEST8, TEST8, TEST8, TEST8 + }; + uint8_t answ_set0[16] = { + 8, 7, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0 + }; + RUN_TEST (test_set0, answ_set0, 128, 8, 0, 9); + + return 0; +} + +/* ASM scan near test for signed version. */ + +int +test_vclzq_u16 () +{ + int i; + uint16x8_t a; + uint16x8_t b; + + uint16_t test_set0[8] = { + TEST0, TEST1, TEST2, TEST3, TEST4, TEST5, TEST6, TEST7 + }; + uint16_t test_set1[8] = { + TEST8, TEST9, TEST10, TEST11, TEST12, TEST13, TEST14, TEST15 + }; + uint16_t test_set2[8] = { + TEST16, TEST16, TEST16, TEST16, TEST16, TEST16, TEST16, TEST16 + }; + + uint16_t answ_set0[8] = { + 16, 15, 14, 13, 12, 11, 10, 9 + }; + + uint16_t answ_set1[8] = { + 8, 7, 6, 5, 4, 3, 2, 1 + }; + uint16_t answ_set2[8] = { + 0, 0, 0, 0, 0, 0, 0, 0 + }; + + RUN_TEST (test_set0, answ_set0, 128, 16, 0, 8); + RUN_TEST (test_set1, answ_set1, 128, 16, 0, 8); + RUN_TEST (test_set2, answ_set2, 128, 16, 0, 1); + + return 0; +} + +/* ASM scan near test for signed version. */ + +int +test_vclzq_u32 () +{ + int i; + uint32x4_t a; + uint32x4_t b; + + uint32_t test_set0[4] = { TEST0, TEST1, TEST2, TEST3 }; + uint32_t test_set1[4] = { TEST4, TEST5, TEST6, TEST7 }; + uint32_t test_set2[4] = { TEST8, TEST9, TEST10, TEST11 }; + uint32_t test_set3[4] = { TEST12, TEST13, TEST14, TEST15 }; + uint32_t test_set4[4] = { TEST16, TEST17, TEST18, TEST19 }; + uint32_t test_set5[4] = { TEST20, TEST21, TEST22, TEST23 }; + uint32_t test_set6[4] = { TEST24, TEST25, TEST26, TEST27 }; + uint32_t test_set7[4] = { TEST28, TEST29, TEST30, TEST31 }; + uint32_t test_set8[4] = { TEST32, TEST32, TEST32, TEST32 }; + + uint32_t answ_set0[4] = { 32, 31, 30, 29 }; + uint32_t answ_set1[4] = { 28, 27, 26, 25 }; + uint32_t answ_set2[4] = { 24, 23, 22, 21 }; + uint32_t answ_set3[4] = { 20, 19, 18, 17 }; + uint32_t answ_set4[4] = { 16, 15, 14, 13 }; + uint32_t answ_set5[4] = { 12, 11, 10, 9 }; + uint32_t answ_set6[4] = { 8, 7, 6, 5 }; + uint32_t answ_set7[4] = { 4, 3, 2, 1 }; + uint32_t answ_set8[4] = { 0, 0, 0, 0 }; + + RUN_TEST (test_set0, answ_set0, 128, 32, 0, 4); + RUN_TEST (test_set1, answ_set1, 128, 32, 0, 4); + RUN_TEST (test_set2, answ_set2, 128, 32, 0, 4); + RUN_TEST (test_set3, answ_set3, 128, 32, 0, 4); + RUN_TEST (test_set4, answ_set4, 128, 32, 0, 1); + + return 0; +} + +/* ASM scan near test for signed version. */ + +int +main (int argc, char **argv) +{ + + if (test_vclz_s8 ()) + abort (); + + if (test_vclz_s16 ()) + abort (); + + if (test_vclz_s32 ()) + abort (); + + if (test_vclzq_s8 ()) + abort (); + + if (test_vclzq_s16 ()) + abort (); + + if (test_vclzq_s32 ()) + abort (); + + if (test_vclz_u8 ()) + abort (); + + if (test_vclz_u16 ()) + abort (); + + if (test_vclz_u32 ()) + abort (); + + if (test_vclzq_u8 ()) + abort (); + + if (test_vclzq_u16 ()) + abort (); + + if (test_vclzq_u32 ()) + abort (); + + return 0; +} + +/* { dg-final { cleanup-saved-temps } } */ -- 2.11.4.GIT