[testsuite] Fix directives order
[official-gcc.git] / gcc / testsuite / gcc.target / aarch64 / vect_smlal_1.c
blobc191d2eba102b5c58c660c6f327640172531ffc6
1 /* { dg-do run } */
2 /* { dg-options "-O3 -fno-inline -save-temps -fno-vect-cost-model -fno-ipa-icf" } */
4 typedef signed char S8_t;
5 typedef signed short S16_t;
6 typedef signed int S32_t;
7 typedef signed long long S64_t;
8 typedef signed char *__restrict__ pS8_t;
9 typedef signed short *__restrict__ pS16_t;
10 typedef signed int *__restrict__ pS32_t;
11 typedef signed long long *__restrict__ pS64_t;
12 typedef unsigned char U8_t;
13 typedef unsigned short U16_t;
14 typedef unsigned int U32_t;
15 typedef unsigned long long U64_t;
16 typedef unsigned char *__restrict__ pU8_t;
17 typedef unsigned short *__restrict__ pU16_t;
18 typedef unsigned int *__restrict__ pU32_t;
19 typedef unsigned long long *__restrict__ pU64_t;
21 extern void abort ();
23 void
24 test_addS64_tS32_t4 (pS64_t a, pS32_t b, pS32_t c)
26 int i;
27 for (i = 0; i < 4; i++)
28 a[i] += (S64_t) b[i] * (S64_t) c[i];
31 /* { dg-final { scan-assembler "smlal\tv\[0-9\]+\.2d" } } */
32 /* { dg-final { scan-assembler "smlal2\tv\[0-9\]+\.2d" } } */
34 void
35 test_addS32_tS16_t8 (pS32_t a, pS16_t b, pS16_t c)
37 int i;
38 for (i = 0; i < 8; i++)
39 a[i] += (S32_t) b[i] * (S32_t) c[i];
42 /* { dg-final { scan-assembler "smlal\tv\[0-9\]+\.4s" } } */
43 /* { dg-final { scan-assembler "smlal2\tv\[0-9\]+\.4s" } } */
45 void
46 test_addS16_tS8_t16 (pS16_t a, pS8_t b, pS8_t c)
48 int i;
49 for (i = 0; i < 16; i++)
50 a[i] += (S16_t) b[i] * (S16_t) c[i];
53 void
54 test_addS16_tS8_t16_neg0 (pS16_t a, pS8_t b, pS8_t c)
56 int i;
57 for (i = 0; i < 16; i++)
58 a[i] += (S16_t) -b[i] * (S16_t) -c[i];
61 void
62 test_addS16_tS8_t16_neg1 (pS16_t a, pS8_t b, pS8_t c)
64 int i;
65 for (i = 0; i < 16; i++)
66 a[i] -= (S16_t) b[i] * (S16_t) -c[i];
69 void
70 test_addS16_tS8_t16_neg2 (pS16_t a, pS8_t b, pS8_t c)
72 int i;
73 for (i = 0; i < 16; i++)
74 a[i] -= (S16_t) -b[i] * (S16_t) c[i];
77 /* { dg-final { scan-assembler-times "smlal\tv\[0-9\]+\.8h" 4 } } */
78 /* { dg-final { scan-assembler-times "smlal2\tv\[0-9\]+\.8h" 4 } } */
80 void
81 test_subS64_tS32_t4 (pS64_t a, pS32_t b, pS32_t c)
83 int i;
84 for (i = 0; i < 4; i++)
85 a[i] -= (S64_t) b[i] * (S64_t) c[i];
88 /* { dg-final { scan-assembler "smlsl\tv\[0-9\]+\.2d" } } */
89 /* { dg-final { scan-assembler "smlsl2\tv\[0-9\]+\.2d" } } */
91 void
92 test_subS32_tS16_t8 (pS32_t a, pS16_t b, pS16_t c)
94 int i;
95 for (i = 0; i < 8; i++)
96 a[i] -= (S32_t) b[i] * (S32_t) c[i];
99 /* { dg-final { scan-assembler "smlsl\tv\[0-9\]+\.4s" } } */
100 /* { dg-final { scan-assembler "smlsl2\tv\[0-9\]+\.4s" } } */
102 void
103 test_subS16_tS8_t16 (pS16_t a, pS8_t b, pS8_t c)
105 int i;
106 for (i = 0; i < 16; i++)
107 a[i] -= (S16_t) b[i] * (S16_t) c[i];
110 void
111 test_subS16_tS8_t16_neg0 (pS16_t a, pS8_t b, pS8_t c)
113 int i;
114 for (i = 0; i < 16; i++)
115 a[i] += (S16_t) -b[i] * (S16_t) c[i];
118 void
119 test_subS16_tS8_t16_neg1 (pS16_t a, pS8_t b, pS8_t c)
121 int i;
122 for (i = 0; i < 16; i++)
123 a[i] += (S16_t) b[i] * (S16_t) -c[i];
126 void
127 test_subS16_tS8_t16_neg2 (pS16_t a, pS8_t b, pS8_t c)
129 int i;
130 for (i = 0; i < 16; i++)
131 a[i] += -((S16_t) b[i] * (S16_t) c[i]);
134 void
135 test_subS16_tS8_t16_neg3 (pS16_t a, pS8_t b, pS8_t c)
137 int i;
138 for (i = 0; i < 16; i++)
139 a[i] -= (S16_t) -b[i] * (S16_t) -c[i];
142 /* { dg-final { scan-assembler-times "smlsl\tv\[0-9\]+\.8h" 5 } } */
143 /* { dg-final { scan-assembler-times "smlsl2\tv\[0-9\]+\.8h" 5 } } */
145 void
146 test_addU64_tU32_t4 (pU64_t a, pU32_t b, pU32_t c)
148 int i;
149 for (i = 0; i < 4; i++)
150 a[i] += (U64_t) b[i] * (U64_t) c[i];
153 /* { dg-final { scan-assembler "umlal\tv\[0-9\]+\.2d" } } */
154 /* { dg-final { scan-assembler "umlal2\tv\[0-9\]+\.2d" } } */
156 void
157 test_addU32_tU16_t8 (pU32_t a, pU16_t b, pU16_t c)
159 int i;
160 for (i = 0; i < 8; i++)
161 a[i] += (U32_t) b[i] * (U32_t) c[i];
164 /* { dg-final { scan-assembler "umlal\tv\[0-9\]+\.4s" } } */
165 /* { dg-final { scan-assembler "umlal2\tv\[0-9\]+\.4s" } } */
167 void
168 test_addU16_tU8_t16 (pU16_t a, pU8_t b, pU8_t c)
170 int i;
171 for (i = 0; i < 16; i++)
172 a[i] += (U16_t) b[i] * (U16_t) c[i];
175 /* { dg-final { scan-assembler "umlal\tv\[0-9\]+\.8h" } } */
176 /* { dg-final { scan-assembler "umlal2\tv\[0-9\]+\.8h" } } */
178 void
179 test_subU64_tU32_t4 (pU64_t a, pU32_t b, pU32_t c)
181 int i;
182 for (i = 0; i < 4; i++)
183 a[i] -= (U64_t) b[i] * (U64_t) c[i];
186 /* { dg-final { scan-assembler "umlsl\tv\[0-9\]+\.2d" } } */
187 /* { dg-final { scan-assembler "umlsl2\tv\[0-9\]+\.2d" } } */
189 void
190 test_subU32_tU16_t8 (pU32_t a, pU16_t b, pU16_t c)
192 int i;
193 for (i = 0; i < 8; i++)
194 a[i] -= (U32_t) b[i] * (U32_t) c[i];
197 /* { dg-final { scan-assembler "umlsl\tv\[0-9\]+\.4s" } } */
198 /* { dg-final { scan-assembler "umlsl2\tv\[0-9\]+\.4s" } } */
200 void
201 test_subU16_tU8_t16 (pU16_t a, pU8_t b, pU8_t c)
203 int i;
204 for (i = 0; i < 16; i++)
205 a[i] -= (U16_t) b[i] * (U16_t) c[i];
208 /* { dg-final { scan-assembler "umlsl\tv\[0-9\]+\.8h" } } */
209 /* { dg-final { scan-assembler "umlsl2\tv\[0-9\]+\.8h" } } */
212 S64_t add_rS64[4] = { 6, 7, -4, -3 };
213 S32_t add_rS32[8] = { 6, 7, -4, -3, 10, 11, 0, 1 };
214 S16_t add_rS16[16] =
215 { 6, 7, -4, -3, 10, 11, 0, 1, 14, 15, 4, 5, 18, 19, 8, 9 };
217 S64_t sub_rS64[4] = { 0, 1, 2, 3 };
218 S32_t sub_rS32[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
219 S16_t sub_rS16[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
221 U64_t add_rU64[4] = { 0x6, 0x7, 0x2fffffffc, 0x2fffffffd };
223 U32_t add_rU32[8] =
225 0x6, 0x7, 0x2fffc, 0x2fffd,
226 0xa, 0xb, 0x30000, 0x30001
229 U16_t add_rU16[16] =
231 0x6, 0x7, 0x2fc, 0x2fd, 0xa, 0xb, 0x300, 0x301,
232 0xe, 0xf, 0x304, 0x305, 0x12, 0x13, 0x308, 0x309
235 U64_t sub_rU64[4] = { 0, 1, 2, 3 };
236 U32_t sub_rU32[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
237 U16_t sub_rU16[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
239 S8_t neg_r[16] = { -6, -5, 8, 9, -2, -1, 12, 13, 2, 3, 16, 17, 6, 7, 20, 21 };
241 S64_t S64_ta[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
242 S32_t S32_tb[16] = { 2, 2, -2, -2, 2, 2, -2, -2, 2, 2, -2, -2, 2, 2, -2, -2 };
243 S32_t S32_tc[16] = { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 };
245 S32_t S32_ta[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
246 S16_t S16_tb[16] = { 2, 2, -2, -2, 2, 2, -2, -2, 2, 2, -2, -2, 2, 2, -2, -2 };
247 S16_t S16_tc[16] = { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 };
249 S16_t S16_ta[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
250 S8_t S8_tb[16] = { 2, 2, -2, -2, 2, 2, -2, -2, 2, 2, -2, -2, 2, 2, -2, -2 };
251 S8_t S8_tc[16] = { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 };
254 #define CHECK(T,N,AS,US) \
255 do \
257 for (i = 0; i < N; i++) \
258 if (S##T##_ta[i] != AS##_r##US##T[i]) \
259 abort (); \
261 while (0)
263 #define SCHECK(T,N,AS) CHECK(T,N,AS,S)
264 #define UCHECK(T,N,AS) CHECK(T,N,AS,U)
266 #define NCHECK(RES) \
267 do \
269 for (i = 0; i < 16; i++) \
270 if (S16_ta[i] != RES[i]) \
271 abort (); \
273 while (0)
277 main ()
279 int i;
281 test_addS64_tS32_t4 (S64_ta, S32_tb, S32_tc);
282 SCHECK (64, 4, add);
283 test_addS32_tS16_t8 (S32_ta, S16_tb, S16_tc);
284 SCHECK (32, 8, add);
285 test_addS16_tS8_t16 (S16_ta, S8_tb, S8_tc);
286 SCHECK (16, 16, add);
287 test_subS64_tS32_t4 (S64_ta, S32_tb, S32_tc);
288 SCHECK (64, 4, sub);
289 test_subS32_tS16_t8 (S32_ta, S16_tb, S16_tc);
290 SCHECK (32, 8, sub);
291 test_subS16_tS8_t16 (S16_ta, S8_tb, S8_tc);
292 SCHECK (16, 16, sub);
294 test_addU64_tU32_t4 (S64_ta, S32_tb, S32_tc);
295 UCHECK (64, 4, add);
296 test_addU32_tU16_t8 (S32_ta, S16_tb, S16_tc);
297 UCHECK (32, 8, add);
298 test_addU16_tU8_t16 (S16_ta, S8_tb, S8_tc);
299 UCHECK (16, 16, add);
300 test_subU64_tU32_t4 (S64_ta, S32_tb, S32_tc);
301 UCHECK (64, 4, sub);
302 test_subU32_tU16_t8 (S32_ta, S16_tb, S16_tc);
303 UCHECK (32, 8, sub);
304 test_subU16_tU8_t16 (S16_ta, S8_tb, S8_tc);
305 UCHECK (16, 16, sub);
307 test_addS16_tS8_t16_neg0 (S16_ta, S8_tb, S8_tc);
308 NCHECK (add_rS16);
309 test_subS16_tS8_t16_neg0 (S16_ta, S8_tb, S8_tc);
310 NCHECK (sub_rS16);
311 test_addS16_tS8_t16_neg1 (S16_ta, S8_tb, S8_tc);
312 NCHECK (add_rS16);
313 test_subS16_tS8_t16_neg1 (S16_ta, S8_tb, S8_tc);
314 NCHECK (sub_rS16);
315 test_addS16_tS8_t16_neg2 (S16_ta, S8_tb, S8_tc);
316 NCHECK (add_rS16);
317 test_subS16_tS8_t16_neg2 (S16_ta, S8_tb, S8_tc);
318 NCHECK (sub_rS16);
319 test_subS16_tS8_t16_neg3 (S16_ta, S8_tb, S8_tc);
320 NCHECK (neg_r);
322 return 0;