Add support for conditional reductions using SVE CLASTB
[official-gcc.git] / gcc / testsuite / gcc.target / aarch64 / sve / vcond_4.c
blob8d137012d9fc111b6647e8e6d09af81bf2c1ca20
1 /* { dg-do compile } */
2 /* { dg-options "-O2 -ftree-vectorize" } */
4 #include <stdint.h>
6 #define eq(A, B) ((A) == (B))
7 #define ne(A, B) ((A) != (B))
8 #define olt(A, B) ((A) < (B))
9 #define ole(A, B) ((A) <= (B))
10 #define oge(A, B) ((A) >= (B))
11 #define ogt(A, B) ((A) > (B))
12 #define ordered(A, B) (!__builtin_isunordered (A, B))
13 #define unordered(A, B) (__builtin_isunordered (A, B))
14 #define ueq(A, B) (!__builtin_islessgreater (A, B))
15 #define ult(A, B) (__builtin_isless (A, B))
16 #define ule(A, B) (__builtin_islessequal (A, B))
17 #define uge(A, B) (__builtin_isgreaterequal (A, B))
18 #define ugt(A, B) (__builtin_isgreater (A, B))
19 #define nueq(A, B) (__builtin_islessgreater (A, B))
20 #define nult(A, B) (!__builtin_isless (A, B))
21 #define nule(A, B) (!__builtin_islessequal (A, B))
22 #define nuge(A, B) (!__builtin_isgreaterequal (A, B))
23 #define nugt(A, B) (!__builtin_isgreater (A, B))
25 #define TEST_LOOP(TYPE1, TYPE2, CMP) \
26 void __attribute__ ((noinline, noclone)) \
27 test_##TYPE1##_##TYPE2##_##CMP##_var (TYPE1 *restrict dest, \
28 TYPE1 *restrict src, \
29 TYPE1 fallback, \
30 TYPE2 *restrict a, \
31 TYPE2 *restrict b, \
32 int count) \
33 { \
34 for (int i = 0; i < count; ++i) \
35 dest[i] = CMP (a[i], b[i]) ? src[i] : fallback; \
36 } \
38 void __attribute__ ((noinline, noclone)) \
39 test_##TYPE1##_##TYPE2##_##CMP##_zero (TYPE1 *restrict dest, \
40 TYPE1 *restrict src, \
41 TYPE1 fallback, \
42 TYPE2 *restrict a, \
43 int count) \
44 { \
45 for (int i = 0; i < count; ++i) \
46 dest[i] = CMP (a[i], 0) ? src[i] : fallback; \
47 } \
49 void __attribute__ ((noinline, noclone)) \
50 test_##TYPE1##_##TYPE2##_##CMP##_sel (TYPE1 *restrict dest, \
51 TYPE1 if_true, \
52 TYPE1 if_false, \
53 TYPE2 *restrict a, \
54 TYPE2 b, int count) \
55 { \
56 for (int i = 0; i < count; ++i) \
57 dest[i] = CMP (a[i], b) ? if_true : if_false; \
60 #define TEST_CMP(CMP) \
61 TEST_LOOP (int32_t, float, CMP) \
62 TEST_LOOP (uint32_t, float, CMP) \
63 TEST_LOOP (int64_t, float, CMP) \
64 TEST_LOOP (uint64_t, float, CMP) \
65 TEST_LOOP (float, float, CMP) \
66 TEST_LOOP (int32_t, double, CMP) \
67 TEST_LOOP (uint32_t, double, CMP) \
68 TEST_LOOP (int64_t, double, CMP) \
69 TEST_LOOP (uint64_t, double, CMP) \
70 TEST_LOOP (double, double, CMP)
72 TEST_CMP (eq)
73 TEST_CMP (ne)
74 TEST_CMP (olt)
75 TEST_CMP (ole)
76 TEST_CMP (oge)
77 TEST_CMP (ogt)
78 TEST_CMP (ordered)
79 TEST_CMP (unordered)
80 TEST_CMP (ueq)
81 TEST_CMP (ult)
82 TEST_CMP (ule)
83 TEST_CMP (uge)
84 TEST_CMP (ugt)
85 TEST_CMP (nueq)
86 TEST_CMP (nult)
87 TEST_CMP (nule)
88 TEST_CMP (nuge)
89 TEST_CMP (nugt)
91 /* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 5 { xfail *-*-* } } } */
92 /* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 10 { xfail *-*-* } } } */
94 /* 5 for ne, 5 for ueq and 5 for nueq. */
95 /* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 { xfail *-*-* } } } */
96 /* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 { xfail *-*-* } } } */
98 /* 5 for lt, 5 for ult and 5 for nult. */
99 /* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 } } */
100 /* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 } } */
102 /* 5 for le, 5 for ule and 5 for nule. */
103 /* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 } } */
104 /* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 } } */
106 /* 5 for gt, 5 for ugt and 5 for nugt. */
107 /* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 } } */
108 /* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 } } */
110 /* 5 for ge, 5 for uge and 5 for nuge. */
111 /* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 } } */
112 /* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 } } */
114 /* { dg-final { scan-assembler-not {\tfcmuo\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} } } */
115 /* 3 loops * 5 invocations for all 12 unordered comparisons. */
116 /* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 180 } } */
118 /* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 7 { xfail *-*-* } } } */
119 /* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 14 { xfail *-*-* } } } */
121 /* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 { xfail *-*-* } } } */
122 /* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 { xfail *-*-* } } } */
124 /* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 } } */
125 /* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 } } */
127 /* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 } } */
128 /* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 } } */
130 /* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 } } */
131 /* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 } } */
133 /* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 } } */
134 /* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 } } */
136 /* { dg-final { scan-assembler-not {\tfcmuo\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} } } */
137 /* 3 loops * 5 invocations, with 2 invocations having ncopies == 2,
138 for all 12 unordered comparisons. */
139 /* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 252 } } */