Update concepts branch to revision 131834
[official-gcc.git] / gcc / testsuite / gcc.target / i386 / ssse3-palignr.c
blob386cddb2ee6cd2d21f5abe60077654ae182d8e2b
1 /* { dg-do run } */
2 /* { dg-require-effective-target ssse3 } */
3 /* { dg-options "-O2 -fno-strict-aliasing -mssse3" } */
5 #include "ssse3-check.h"
6 #include "ssse3-vals.h"
8 #include <tmmintrin.h>
9 #include <string.h>
11 /* Test the 64-bit form */
12 static void
13 ssse3_test_palignr (int *i1, int *i2, unsigned int imm, int *r)
15 __m64 t1 = *(__m64 *) i1;
16 __m64 t2 = *(__m64 *) i2;
18 switch (imm)
20 case 0:
21 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 0);
22 break;
23 case 1:
24 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 1);
25 break;
26 case 2:
27 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 2);
28 break;
29 case 3:
30 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 3);
31 break;
32 case 4:
33 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 4);
34 break;
35 case 5:
36 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 5);
37 break;
38 case 6:
39 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 6);
40 break;
41 case 7:
42 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 7);
43 break;
44 case 8:
45 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 8);
46 break;
47 case 9:
48 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 9);
49 break;
50 case 10:
51 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 10);
52 break;
53 case 11:
54 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 11);
55 break;
56 case 12:
57 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 12);
58 break;
59 case 13:
60 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 13);
61 break;
62 case 14:
63 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 14);
64 break;
65 case 15:
66 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 15);
67 break;
68 default:
69 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 16);
70 break;
73 _mm_empty();
76 /* Test the 128-bit form */
77 static void
78 ssse3_test_palignr128 (int *i1, int *i2, unsigned int imm, int *r)
80 /* Assumes incoming pointers are 16-byte aligned */
81 __m128i t1 = *(__m128i *) i1;
82 __m128i t2 = *(__m128i *) i2;
84 switch (imm)
86 case 0:
87 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 0);
88 break;
89 case 1:
90 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 1);
91 break;
92 case 2:
93 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 2);
94 break;
95 case 3:
96 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 3);
97 break;
98 case 4:
99 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 4);
100 break;
101 case 5:
102 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 5);
103 break;
104 case 6:
105 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 6);
106 break;
107 case 7:
108 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 7);
109 break;
110 case 8:
111 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 8);
112 break;
113 case 9:
114 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 9);
115 break;
116 case 10:
117 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 10);
118 break;
119 case 11:
120 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 11);
121 break;
122 case 12:
123 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 12);
124 break;
125 case 13:
126 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 13);
127 break;
128 case 14:
129 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 14);
130 break;
131 case 15:
132 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 15);
133 break;
134 case 16:
135 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 16);
136 break;
137 case 17:
138 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 17);
139 break;
140 case 18:
141 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 18);
142 break;
143 case 19:
144 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 19);
145 break;
146 case 20:
147 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 20);
148 break;
149 case 21:
150 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 21);
151 break;
152 case 22:
153 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 22);
154 break;
155 case 23:
156 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 23);
157 break;
158 case 24:
159 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 24);
160 break;
161 case 25:
162 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 25);
163 break;
164 case 26:
165 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 26);
166 break;
167 case 27:
168 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 27);
169 break;
170 case 28:
171 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 28);
172 break;
173 case 29:
174 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 29);
175 break;
176 case 30:
177 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 30);
178 break;
179 case 31:
180 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 31);
181 break;
182 default:
183 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 32);
184 break;
188 /* Routine to manually compute the results */
189 static void
190 compute_correct_result_128 (int *i1, int *i2, unsigned int imm, int *r)
192 char buf [32];
193 char *bout = (char *) r;
194 int i;
196 memcpy (&buf[0], i2, 16);
197 memcpy (&buf[16], i1, 16);
199 for (i = 0; i < 16; i++)
200 if (imm >= 32 || imm + i >= 32)
201 bout[i] = 0;
202 else
203 bout[i] = buf[imm + i];
206 static void
207 compute_correct_result_64 (int *i1, int *i2, unsigned int imm, int *r)
209 char buf [16];
210 char *bout = (char *)r;
211 int i;
213 /* Handle the first half */
214 memcpy (&buf[0], i2, 8);
215 memcpy (&buf[8], i1, 8);
217 for (i = 0; i < 8; i++)
218 if (imm >= 16 || imm + i >= 16)
219 bout[i] = 0;
220 else
221 bout[i] = buf[imm + i];
223 /* Handle the second half */
224 memcpy (&buf[0], &i2[2], 8);
225 memcpy (&buf[8], &i1[2], 8);
227 for (i = 0; i < 8; i++)
228 if (imm >= 16 || imm + i >= 16)
229 bout[i + 8] = 0;
230 else
231 bout[i + 8] = buf[imm + i];
234 static void
235 ssse3_test (void)
237 int i;
238 int r [4] __attribute__ ((aligned(16)));
239 int ck [4];
240 unsigned int imm;
241 int fail = 0;
243 for (i = 0; i < 256; i += 8)
244 for (imm = 0; imm < 100; imm++)
246 /* Manually compute the result */
247 compute_correct_result_64 (&vals[i + 0], &vals[i + 4], imm, ck);
249 /* Run the 64-bit tests */
250 ssse3_test_palignr (&vals[i + 0], &vals[i + 4], imm, &r[0]);
251 ssse3_test_palignr (&vals[i + 2], &vals[i + 6], imm, &r[2]);
252 fail += chk_128 (ck, r);
254 /* Recompute the results for 128-bits */
255 compute_correct_result_128 (&vals[i + 0], &vals[i + 4], imm, ck);
257 /* Run the 128-bit tests */
258 ssse3_test_palignr128 (&vals[i + 0], &vals[i + 4], imm, r);
259 fail += chk_128 (ck, r);
262 if (fail != 0)
263 abort ();