Add execution + assembler tests of AArch64 TRN Intrinsics.
[official-gcc.git] / gcc / testsuite / gcc.target / aarch64 / vdup_lane_1.c
blob4582471c8aad3d855eb33494ac01a62c87978ca9
1 /* Test vdup_lane intrinsics work correctly. */
2 /* { dg-do run } */
3 /* { dg-options "--save-temps -O1" } */
5 #include <arm_neon.h>
7 extern void abort (void);
9 float32x2_t __attribute__ ((noinline))
10 wrap_vdup_lane_f32_0 (float32x2_t a)
12 return vdup_lane_f32 (a, 0);
15 float32x2_t __attribute__ ((noinline))
16 wrap_vdup_lane_f32_1 (float32x2_t a)
18 return vdup_lane_f32 (a, 1);
21 int __attribute__ ((noinline))
22 test_vdup_lane_f32 ()
24 float32x2_t a;
25 float32x2_t b;
26 int i;
27 float32_t c[2] = { 0.0 , 3.14 };
28 float32_t d[2];
30 a = vld1_f32 (c);
31 b = wrap_vdup_lane_f32_0 (a);
32 vst1_f32 (d, b);
33 for (i = 0; i < 2; i++)
34 if (c[0] != d[i])
35 return 1;
37 b = wrap_vdup_lane_f32_1 (a);
38 vst1_f32 (d, b);
39 for (i = 0; i < 2; i++)
40 if (c[1] != d[i])
41 return 1;
42 return 0;
45 float32x4_t __attribute__ ((noinline))
46 wrap_vdupq_lane_f32_0 (float32x2_t a)
48 return vdupq_lane_f32 (a, 0);
51 float32x4_t __attribute__ ((noinline))
52 wrap_vdupq_lane_f32_1 (float32x2_t a)
54 return vdupq_lane_f32 (a, 1);
57 int __attribute__ ((noinline))
58 test_vdupq_lane_f32 ()
60 float32x2_t a;
61 float32x4_t b;
62 int i;
63 float32_t c[2] = { 0.0 , 3.14 };
64 float32_t d[4];
66 a = vld1_f32 (c);
67 b = wrap_vdupq_lane_f32_0 (a);
68 vst1q_f32 (d, b);
69 for (i = 0; i < 4; i++)
70 if (c[0] != d[i])
71 return 1;
73 b = wrap_vdupq_lane_f32_1 (a);
74 vst1q_f32 (d, b);
75 for (i = 0; i < 4; i++)
76 if (c[1] != d[i])
77 return 1;
78 return 0;
81 int8x8_t __attribute__ ((noinline))
82 wrap_vdup_lane_s8_0 (int8x8_t a)
84 return vdup_lane_s8 (a, 0);
87 int8x8_t __attribute__ ((noinline))
88 wrap_vdup_lane_s8_1 (int8x8_t a)
90 return vdup_lane_s8 (a, 1);
93 int __attribute__ ((noinline))
94 test_vdup_lane_s8 ()
96 int8x8_t a;
97 int8x8_t b;
98 int i;
99 /* Only two first cases are interesting. */
100 int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
101 int8_t d[8];
103 a = vld1_s8 (c);
104 b = wrap_vdup_lane_s8_0 (a);
105 vst1_s8 (d, b);
106 for (i = 0; i < 8; i++)
107 if (c[0] != d[i])
108 return 1;
110 b = wrap_vdup_lane_s8_1 (a);
111 vst1_s8 (d, b);
112 for (i = 0; i < 8; i++)
113 if (c[1] != d[i])
114 return 1;
115 return 0;
118 int8x16_t __attribute__ ((noinline))
119 wrap_vdupq_lane_s8_0 (int8x8_t a)
121 return vdupq_lane_s8 (a, 0);
124 int8x16_t __attribute__ ((noinline))
125 wrap_vdupq_lane_s8_1 (int8x8_t a)
127 return vdupq_lane_s8 (a, 1);
130 int __attribute__ ((noinline))
131 test_vdupq_lane_s8 ()
133 int8x8_t a;
134 int8x16_t b;
135 int i;
136 /* Only two first cases are interesting. */
137 int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
138 int8_t d[16];
140 a = vld1_s8 (c);
141 b = wrap_vdupq_lane_s8_0 (a);
142 vst1q_s8 (d, b);
143 for (i = 0; i < 16; i++)
144 if (c[0] != d[i])
145 return 1;
147 b = wrap_vdupq_lane_s8_1 (a);
148 vst1q_s8 (d, b);
149 for (i = 0; i < 16; i++)
150 if (c[1] != d[i])
151 return 1;
152 return 0;
155 int16x4_t __attribute__ ((noinline))
156 wrap_vdup_lane_s16_0 (int16x4_t a)
158 return vdup_lane_s16 (a, 0);
161 int16x4_t __attribute__ ((noinline))
162 wrap_vdup_lane_s16_1 (int16x4_t a)
164 return vdup_lane_s16 (a, 1);
167 int __attribute__ ((noinline))
168 test_vdup_lane_s16 ()
170 int16x4_t a;
171 int16x4_t b;
172 int i;
173 /* Only two first cases are interesting. */
174 int16_t c[4] = { 0, 1, 2, 3 };
175 int16_t d[4];
177 a = vld1_s16 (c);
178 b = wrap_vdup_lane_s16_0 (a);
179 vst1_s16 (d, b);
180 for (i = 0; i < 4; i++)
181 if (c[0] != d[i])
182 return 1;
184 b = wrap_vdup_lane_s16_1 (a);
185 vst1_s16 (d, b);
186 for (i = 0; i < 4; i++)
187 if (c[1] != d[i])
188 return 1;
189 return 0;
192 int16x8_t __attribute__ ((noinline))
193 wrap_vdupq_lane_s16_0 (int16x4_t a)
195 return vdupq_lane_s16 (a, 0);
198 int16x8_t __attribute__ ((noinline))
199 wrap_vdupq_lane_s16_1 (int16x4_t a)
201 return vdupq_lane_s16 (a, 1);
204 int __attribute__ ((noinline))
205 test_vdupq_lane_s16 ()
207 int16x4_t a;
208 int16x8_t b;
209 int i;
210 /* Only two first cases are interesting. */
211 int16_t c[4] = { 0, 1, 2, 3 };
212 int16_t d[8];
214 a = vld1_s16 (c);
215 b = wrap_vdupq_lane_s16_0 (a);
216 vst1q_s16 (d, b);
217 for (i = 0; i < 8; i++)
218 if (c[0] != d[i])
219 return 1;
221 b = wrap_vdupq_lane_s16_1 (a);
222 vst1q_s16 (d, b);
223 for (i = 0; i < 8; i++)
224 if (c[1] != d[i])
225 return 1;
226 return 0;
229 int32x2_t __attribute__ ((noinline))
230 wrap_vdup_lane_s32_0 (int32x2_t a)
232 return vdup_lane_s32 (a, 0);
235 int32x2_t __attribute__ ((noinline))
236 wrap_vdup_lane_s32_1 (int32x2_t a)
238 return vdup_lane_s32 (a, 1);
241 int __attribute__ ((noinline))
242 test_vdup_lane_s32 ()
244 int32x2_t a;
245 int32x2_t b;
246 int i;
247 int32_t c[2] = { 0, 1 };
248 int32_t d[2];
250 a = vld1_s32 (c);
251 b = wrap_vdup_lane_s32_0 (a);
252 vst1_s32 (d, b);
253 for (i = 0; i < 2; i++)
254 if (c[0] != d[i])
255 return 1;
257 b = wrap_vdup_lane_s32_1 (a);
258 vst1_s32 (d, b);
259 for (i = 0; i < 2; i++)
260 if (c[1] != d[i])
261 return 1;
262 return 0;
265 int32x4_t __attribute__ ((noinline))
266 wrap_vdupq_lane_s32_0 (int32x2_t a)
268 return vdupq_lane_s32 (a, 0);
271 int32x4_t __attribute__ ((noinline))
272 wrap_vdupq_lane_s32_1 (int32x2_t a)
274 return vdupq_lane_s32 (a, 1);
277 int __attribute__ ((noinline))
278 test_vdupq_lane_s32 ()
280 int32x2_t a;
281 int32x4_t b;
282 int i;
283 int32_t c[2] = { 0, 1 };
284 int32_t d[4];
286 a = vld1_s32 (c);
287 b = wrap_vdupq_lane_s32_0 (a);
288 vst1q_s32 (d, b);
289 for (i = 0; i < 4; i++)
290 if (c[0] != d[i])
291 return 1;
293 b = wrap_vdupq_lane_s32_1 (a);
294 vst1q_s32 (d, b);
295 for (i = 0; i < 4; i++)
296 if (c[1] != d[i])
297 return 1;
298 return 0;
301 int64x1_t __attribute__ ((noinline))
302 wrap_vdup_lane_s64_0 (int64x1_t a)
304 return vdup_lane_s64 (a, 0);
307 int64x1_t __attribute__ ((noinline))
308 wrap_vdup_lane_s64_1 (int64x1_t a)
310 return vdup_lane_s64 (a, 1);
313 int __attribute__ ((noinline))
314 test_vdup_lane_s64 ()
316 int64x1_t a;
317 int64x1_t b;
318 int64_t c[1];
319 int64_t d[1];
321 c[0] = 0;
322 a = vld1_s64 (c);
323 b = wrap_vdup_lane_s64_0 (a);
324 vst1_s64 (d, b);
325 if (c[0] != d[0])
326 return 1;
328 c[0] = 1;
329 a = vld1_s64 (c);
330 b = wrap_vdup_lane_s64_1 (a);
331 vst1_s64 (d, b);
332 if (c[0] != d[0])
333 return 1;
334 return 0;
337 int64x2_t __attribute__ ((noinline))
338 wrap_vdupq_lane_s64_0 (int64x1_t a)
340 return vdupq_lane_s64 (a, 0);
343 int64x2_t __attribute__ ((noinline))
344 wrap_vdupq_lane_s64_1 (int64x1_t a)
346 return vdupq_lane_s64 (a, 1);
349 int __attribute__ ((noinline))
350 test_vdupq_lane_s64 ()
352 int64x1_t a;
353 int64x2_t b;
354 int i;
355 int64_t c[1];
356 int64_t d[2];
358 c[0] = 0;
359 a = vld1_s64 (c);
360 b = wrap_vdupq_lane_s64_0 (a);
361 vst1q_s64 (d, b);
362 for (i = 0; i < 2; i++)
363 if (c[0] != d[i])
364 return 1;
366 c[0] = 1;
367 a = vld1_s64 (c);
368 b = wrap_vdupq_lane_s64_1 (a);
369 vst1q_s64 (d, b);
370 for (i = 0; i < 2; i++)
371 if (c[0] != d[i])
372 return 1;
373 return 0;
377 main ()
380 if (test_vdup_lane_f32 ())
381 abort ();
382 if (test_vdup_lane_s8 ())
383 abort ();
384 if (test_vdup_lane_s16 ())
385 abort ();
386 if (test_vdup_lane_s32 ())
387 abort ();
388 if (test_vdup_lane_s64 ())
389 abort ();
390 if (test_vdupq_lane_f32 ())
391 abort ();
392 if (test_vdupq_lane_s8 ())
393 abort ();
394 if (test_vdupq_lane_s16 ())
395 abort ();
396 if (test_vdupq_lane_s32 ())
397 abort ();
398 if (test_vdupq_lane_s64 ())
399 abort ();
401 return 0;
404 /* Asm check for test_vdup_lane_s8. */
405 /* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, v\[0-9\]+\.b\\\[0\\\]" 1 } } */
406 /* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, v\[0-9\]+\.b\\\[1\\\]" 1 } } */
408 /* Asm check for test_vdupq_lane_s8. */
409 /* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, v\[0-9\]+\.b\\\[0\\\]" 1 } } */
410 /* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, v\[0-9\]+\.b\\\[1\\\]" 1 } } */
412 /* Asm check for test_vdup_lane_s16. */
413 /* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, v\[0-9\]+\.h\\\[0\\\]" 1 } } */
414 /* Asm check for test_vdup_lane_s16. */
415 /* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, v\[0-9\]+\.h\\\[1\\\]" 1 } } */
417 /* Asm check for test_vdupq_lane_s16. */
418 /* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, v\[0-9\]+\.h\\\[0\\\]" 1 } } */
419 /* Asm check for test_vdupq_lane_s16. */
420 /* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, v\[0-9\]+\.h\\\[1\\\]" 1 } } */
422 /* Asm check for test_vdup_lane_f32 and test_vdup_lane_s32. */
423 /* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, v\[0-9\]+\.s\\\[0\\\]" 2 } } */
424 /* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, v\[0-9\]+\.s\\\[1\\\]" 2 } } */
426 /* Asm check for test_vdupq_lane_f32 and test_vdupq_lane_s32. */
427 /* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, v\[0-9\]+\.s\\\[0\\\]" 2 } } */
428 /* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, v\[0-9\]+\.s\\\[1\\\]" 2 } } */
430 /* { dg-final { cleanup-saved-temps } } */