Use gather loads for strided accesses
[official-gcc.git] / gcc / testsuite / gcc.target / i386 / avx512vl-vbroadcast-3.c
blob7233398cd64edd796825a42ea68dfe3bb3d6330a
1 /* { dg-do compile { target { ! ia32 } } } */
2 /* { dg-options "-O2 -mavx512vl -masm=att" } */
4 typedef float V1 __attribute__((vector_size (16)));
5 typedef float V2 __attribute__((vector_size (32)));
6 typedef int V4 __attribute__((vector_size (16)));
7 typedef int V5 __attribute__((vector_size (32)));
9 void
10 f1 (V1 x)
12 register V1 a __asm ("xmm16");
13 a = x;
14 asm volatile ("" : "+v" (a));
15 a = __builtin_shuffle (a, (V4) { 0, 0, 0, 0 });
16 asm volatile ("" : "+v" (a));
19 void
20 f2 (V1 x)
22 register V1 a __asm ("xmm16");
23 a = x;
24 asm volatile ("" : "+v" (a));
25 a = __builtin_shuffle (a, (V4) { 1, 1, 1, 1 });
26 asm volatile ("" : "+v" (a));
29 void
30 f3 (V1 x)
32 register V1 a __asm ("xmm16");
33 a = x;
34 asm volatile ("" : "+v" (a));
35 a = __builtin_shuffle (a, (V4) { 2, 2, 2, 2 });
36 asm volatile ("" : "+v" (a));
39 void
40 f4 (V1 x)
42 register V1 a __asm ("xmm16");
43 a = x;
44 asm volatile ("" : "+v" (a));
45 a = __builtin_shuffle (a, (V4) { 3, 3, 3, 3 });
46 asm volatile ("" : "+v" (a));
49 void
50 f5 (V1 *x)
52 register V1 a __asm ("xmm16");
53 a = __builtin_shuffle (*x, (V4) { 0, 0, 0, 0 });
54 asm volatile ("" : "+v" (a));
57 void
58 f6 (V1 *x)
60 register V1 a __asm ("xmm16");
61 a = __builtin_shuffle (*x, (V4) { 1, 1, 1, 1 });
62 asm volatile ("" : "+v" (a));
65 void
66 f7 (V1 *x)
68 register V1 a __asm ("xmm16");
69 a = __builtin_shuffle (*x, (V4) { 2, 2, 2, 2 });
70 asm volatile ("" : "+v" (a));
73 void
74 f8 (V1 *x)
76 register V1 a __asm ("xmm16");
77 a = __builtin_shuffle (*x, (V4) { 3, 3, 3, 3 });
78 asm volatile ("" : "+v" (a));
81 void
82 f9 (V2 x)
84 register V2 a __asm ("xmm16");
85 a = x;
86 asm volatile ("" : "+v" (a));
87 a = __builtin_shuffle (a, (V5) { 0, 0, 0, 0, 0, 0, 0, 0 });
88 asm volatile ("" : "+v" (a));
91 void
92 f10 (V2 x)
94 register V2 a __asm ("xmm16");
95 a = x;
96 asm volatile ("" : "+v" (a));
97 a = __builtin_shuffle (a, (V5) { 1, 1, 1, 1, 1, 1, 1, 1 });
98 asm volatile ("" : "+v" (a));
101 void
102 f11 (V2 x)
104 register V2 a __asm ("xmm16");
105 a = x;
106 asm volatile ("" : "+v" (a));
107 a = __builtin_shuffle (a, (V5) { 4, 4, 4, 4, 4, 4, 4, 4 });
108 asm volatile ("" : "+v" (a));
111 void
112 f12 (V2 x)
114 register V2 a __asm ("xmm16");
115 a = x;
116 asm volatile ("" : "+v" (a));
117 a = __builtin_shuffle (a, (V5) { 5, 5, 5, 5, 5, 5, 5, 5 });
118 asm volatile ("" : "+v" (a));
121 void
122 f13 (V2 *x)
124 register V2 a __asm ("xmm16");
125 a = __builtin_shuffle (*x, (V5) { 0, 0, 0, 0, 0, 0, 0, 0 });
126 asm volatile ("" : "+v" (a));
129 void
130 f14 (V2 *x)
132 register V2 a __asm ("xmm16");
133 a = __builtin_shuffle (*x, (V5) { 1, 1, 1, 1, 1, 1, 1, 1 });
134 asm volatile ("" : "+v" (a));
137 void
138 f15 (V2 *x)
140 register V2 a __asm ("xmm16");
141 a = __builtin_shuffle (*x, (V5) { 4, 4, 4, 4, 4, 4, 4, 4 });
142 asm volatile ("" : "+v" (a));
145 void
146 f16 (V2 *x)
148 register V2 a __asm ("xmm16");
149 a = __builtin_shuffle (*x, (V5) { 5, 5, 5, 5, 5, 5, 5, 5 });
150 asm volatile ("" : "+v" (a));
153 /* { dg-final { scan-assembler-times "vbroadcastss\[^\n\r]*%\[re\]di\[^\n\r]*%xmm16" 4 } } */
154 /* { dg-final { scan-assembler-times "vbroadcastss\[^\n\r]*%xmm16\[^\n\r]*%ymm16" 3 } } */
155 /* { dg-final { scan-assembler-times "vbroadcastss\[^\n\r]*%\[re\]di\[^\n\r]*%ymm16" 3 } } */
156 /* { dg-final { scan-assembler-times "vpermilps\[^\n\r]*\\\$0\[^\n\r]*%xmm16\[^\n\r]*%xmm16" 1 } } */
157 /* { dg-final { scan-assembler-times "vpermilps\[^\n\r]*\\\$85\[^\n\r]*%xmm16\[^\n\r]*%xmm16" 1 } } */
158 /* { dg-final { scan-assembler-times "vpermilps\[^\n\r]*\\\$170\[^\n\r]*%xmm16\[^\n\r]*%xmm16" 1 } } */
159 /* { dg-final { scan-assembler-times "vpermilps\[^\n\r]*\\\$255\[^\n\r]*%xmm16\[^\n\r]*%xmm16" 1 } } */
160 /* { dg-final { scan-assembler-times "vpermilps\[^\n\r]*\\\$0\[^\n\r]*%ymm16\[^\n\r]*%ymm16" 1 } } */
161 /* { dg-final { scan-assembler-times "vpermilps\[^\n\r]*\\\$85\[^\n\r]*%ymm16\[^\n\r]*%ymm16" 2 } } */
162 /* { dg-final { scan-assembler-times "vshuff32x4\[^\n\r]*\\\$3\[^\n\r]*%ymm16\[^\n\r]*%ymm16\[^\n\r]*%ymm16" 2 } } */