PR inline-asm/84742
[official-gcc.git] / gcc / testsuite / gcc.target / i386 / ssse3-palignr.c
blobdbee9bee4aa04ef2c1120c39a818f815a5f4ace3
1 /* { dg-do run } */
2 /* { dg-require-effective-target ssse3 } */
3 /* { dg-options "-O2 -fno-strict-aliasing -mssse3" } */
5 #ifndef CHECK_H
6 #define CHECK_H "ssse3-check.h"
7 #endif
9 #ifndef TEST
10 #define TEST ssse3_test
11 #endif
13 #include CHECK_H
15 #include "ssse3-vals.h"
17 #include <tmmintrin.h>
18 #include <string.h>
20 #ifndef __AVX__
21 /* Test the 64-bit form */
22 static void
23 ssse3_test_palignr (int *i1, int *i2, unsigned int imm, int *r)
25 __m64 t1 = *(__m64 *) i1;
26 __m64 t2 = *(__m64 *) i2;
28 switch (imm)
30 case 0:
31 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 0);
32 break;
33 case 1:
34 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 1);
35 break;
36 case 2:
37 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 2);
38 break;
39 case 3:
40 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 3);
41 break;
42 case 4:
43 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 4);
44 break;
45 case 5:
46 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 5);
47 break;
48 case 6:
49 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 6);
50 break;
51 case 7:
52 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 7);
53 break;
54 case 8:
55 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 8);
56 break;
57 case 9:
58 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 9);
59 break;
60 case 10:
61 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 10);
62 break;
63 case 11:
64 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 11);
65 break;
66 case 12:
67 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 12);
68 break;
69 case 13:
70 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 13);
71 break;
72 case 14:
73 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 14);
74 break;
75 case 15:
76 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 15);
77 break;
78 default:
79 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 16);
80 break;
83 _mm_empty();
85 #endif
87 /* Test the 128-bit form */
88 static void
89 ssse3_test_palignr128 (int *i1, int *i2, unsigned int imm, int *r)
91 /* Assumes incoming pointers are 16-byte aligned */
92 __m128i t1 = *(__m128i *) i1;
93 __m128i t2 = *(__m128i *) i2;
95 switch (imm)
97 case 0:
98 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 0);
99 break;
100 case 1:
101 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 1);
102 break;
103 case 2:
104 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 2);
105 break;
106 case 3:
107 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 3);
108 break;
109 case 4:
110 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 4);
111 break;
112 case 5:
113 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 5);
114 break;
115 case 6:
116 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 6);
117 break;
118 case 7:
119 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 7);
120 break;
121 case 8:
122 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 8);
123 break;
124 case 9:
125 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 9);
126 break;
127 case 10:
128 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 10);
129 break;
130 case 11:
131 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 11);
132 break;
133 case 12:
134 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 12);
135 break;
136 case 13:
137 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 13);
138 break;
139 case 14:
140 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 14);
141 break;
142 case 15:
143 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 15);
144 break;
145 case 16:
146 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 16);
147 break;
148 case 17:
149 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 17);
150 break;
151 case 18:
152 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 18);
153 break;
154 case 19:
155 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 19);
156 break;
157 case 20:
158 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 20);
159 break;
160 case 21:
161 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 21);
162 break;
163 case 22:
164 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 22);
165 break;
166 case 23:
167 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 23);
168 break;
169 case 24:
170 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 24);
171 break;
172 case 25:
173 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 25);
174 break;
175 case 26:
176 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 26);
177 break;
178 case 27:
179 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 27);
180 break;
181 case 28:
182 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 28);
183 break;
184 case 29:
185 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 29);
186 break;
187 case 30:
188 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 30);
189 break;
190 case 31:
191 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 31);
192 break;
193 default:
194 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 32);
195 break;
199 /* Routine to manually compute the results */
200 static void
201 compute_correct_result_128 (int *i1, int *i2, unsigned int imm, int *r)
203 char buf [32];
204 char *bout = (char *) r;
205 int i;
207 memcpy (&buf[0], i2, 16);
208 memcpy (&buf[16], i1, 16);
210 for (i = 0; i < 16; i++)
211 if (imm >= 32 || imm + i >= 32)
212 bout[i] = 0;
213 else
214 bout[i] = buf[imm + i];
217 #ifndef __AVX__
218 static void
219 compute_correct_result_64 (int *i1, int *i2, unsigned int imm, int *r)
221 char buf [16];
222 char *bout = (char *)r;
223 int i;
225 /* Handle the first half */
226 memcpy (&buf[0], i2, 8);
227 memcpy (&buf[8], i1, 8);
229 for (i = 0; i < 8; i++)
230 if (imm >= 16 || imm + i >= 16)
231 bout[i] = 0;
232 else
233 bout[i] = buf[imm + i];
235 /* Handle the second half */
236 memcpy (&buf[0], &i2[2], 8);
237 memcpy (&buf[8], &i1[2], 8);
239 for (i = 0; i < 8; i++)
240 if (imm >= 16 || imm + i >= 16)
241 bout[i + 8] = 0;
242 else
243 bout[i + 8] = buf[imm + i];
245 #endif
247 static void
248 TEST (void)
250 int i;
251 int r [4] __attribute__ ((aligned(16)));
252 int ck [4];
253 unsigned int imm;
254 int fail = 0;
256 for (i = 0; i < 256; i += 8)
257 for (imm = 0; imm < 100; imm++)
259 #ifndef __AVX__
260 /* Manually compute the result */
261 compute_correct_result_64 (&vals[i + 0], &vals[i + 4], imm, ck);
263 /* Run the 64-bit tests */
264 ssse3_test_palignr (&vals[i + 0], &vals[i + 4], imm, &r[0]);
265 ssse3_test_palignr (&vals[i + 2], &vals[i + 6], imm, &r[2]);
266 fail += chk_128 (ck, r);
267 #endif
269 /* Recompute the results for 128-bits */
270 compute_correct_result_128 (&vals[i + 0], &vals[i + 4], imm, ck);
272 /* Run the 128-bit tests */
273 ssse3_test_palignr128 (&vals[i + 0], &vals[i + 4], imm, r);
274 fail += chk_128 (ck, r);
277 if (fail != 0)
278 abort ();