2006-12-15 H.J. Lu <hongjiu.lu@intel.com>
[official-gcc.git] / gcc / testsuite / gcc.target / i386 / ssse3-palignr.c
blob683d24fdf496db859e467d2666dc40dac5f84256
1 /* { dg-do run { target i?86-*-* x86_64-*-* } } */
2 /* { dg-options "-O2 -mssse3" } */
3 #include <tmmintrin.h>
4 #include <string.h>
5 #include <stdlib.h>
6 #include "../../gcc.dg/i386-cpuid.h"
7 #include "ssse3-vals.h"
9 static void ssse3_test (void);
11 int
12 main ()
14 unsigned long cpu_facilities;
16 cpu_facilities = i386_cpuid_ecx ();
18 /* Run SSSE3 test only if host has SSSE3 support. */
19 if ((cpu_facilities & bit_SSSE3))
20 ssse3_test ();
22 exit (0);
25 /* Test the 64-bit form */
26 static void
27 ssse3_test_palignr (int *i1, int *i2, unsigned int imm, int *r)
29 __m64 t1 = *(__m64 *) i1;
30 __m64 t2 = *(__m64 *) i2;
32 switch (imm)
34 case 0:
35 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 0);
36 break;
37 case 1:
38 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 1);
39 break;
40 case 2:
41 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 2);
42 break;
43 case 3:
44 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 3);
45 break;
46 case 4:
47 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 4);
48 break;
49 case 5:
50 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 5);
51 break;
52 case 6:
53 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 6);
54 break;
55 case 7:
56 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 7);
57 break;
58 case 8:
59 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 8);
60 break;
61 case 9:
62 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 9);
63 break;
64 case 10:
65 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 10);
66 break;
67 case 11:
68 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 11);
69 break;
70 case 12:
71 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 12);
72 break;
73 case 13:
74 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 13);
75 break;
76 case 14:
77 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 14);
78 break;
79 case 15:
80 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 15);
81 break;
82 default:
83 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 16);
84 break;
87 _mm_empty();
90 /* Test the 128-bit form */
91 static void
92 ssse3_test_palignr128 (int *i1, int *i2, unsigned int imm, int *r)
94 /* Assumes incoming pointers are 16-byte aligned */
95 __m128i t1 = *(__m128i *) i1;
96 __m128i t2 = *(__m128i *) i2;
98 switch (imm)
100 case 0:
101 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 0);
102 break;
103 case 1:
104 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 1);
105 break;
106 case 2:
107 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 2);
108 break;
109 case 3:
110 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 3);
111 break;
112 case 4:
113 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 4);
114 break;
115 case 5:
116 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 5);
117 break;
118 case 6:
119 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 6);
120 break;
121 case 7:
122 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 7);
123 break;
124 case 8:
125 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 8);
126 break;
127 case 9:
128 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 9);
129 break;
130 case 10:
131 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 10);
132 break;
133 case 11:
134 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 11);
135 break;
136 case 12:
137 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 12);
138 break;
139 case 13:
140 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 13);
141 break;
142 case 14:
143 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 14);
144 break;
145 case 15:
146 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 15);
147 break;
148 case 16:
149 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 16);
150 break;
151 case 17:
152 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 17);
153 break;
154 case 18:
155 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 18);
156 break;
157 case 19:
158 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 19);
159 break;
160 case 20:
161 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 20);
162 break;
163 case 21:
164 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 21);
165 break;
166 case 22:
167 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 22);
168 break;
169 case 23:
170 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 23);
171 break;
172 case 24:
173 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 24);
174 break;
175 case 25:
176 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 25);
177 break;
178 case 26:
179 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 26);
180 break;
181 case 27:
182 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 27);
183 break;
184 case 28:
185 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 28);
186 break;
187 case 29:
188 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 29);
189 break;
190 case 30:
191 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 30);
192 break;
193 case 31:
194 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 31);
195 break;
196 default:
197 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 32);
198 break;
202 /* Routine to manually compute the results */
203 static void
204 compute_correct_result_128 (int *i1, int *i2, unsigned int imm, int *r)
206 char buf [32];
207 char *bout = (char *) r;
208 int i;
210 memcpy (&buf[0], i2, 16);
211 memcpy (&buf[16], i1, 16);
213 for (i = 0; i < 16; i++)
214 if (imm >= 32 || imm + i >= 32)
215 bout[i] = 0;
216 else
217 bout[i] = buf[imm + i];
220 static void
221 compute_correct_result_64 (int *i1, int *i2, unsigned int imm, int *r)
223 char buf [16];
224 char *bout = (char *)r;
225 int i;
227 /* Handle the first half */
228 memcpy (&buf[0], i2, 8);
229 memcpy (&buf[8], i1, 8);
231 for (i = 0; i < 8; i++)
232 if (imm >= 16 || imm + i >= 16)
233 bout[i] = 0;
234 else
235 bout[i] = buf[imm + i];
237 /* Handle the second half */
238 memcpy (&buf[0], &i2[2], 8);
239 memcpy (&buf[8], &i1[2], 8);
241 for (i = 0; i < 8; i++)
242 if (imm >= 16 || imm + i >= 16)
243 bout[i + 8] = 0;
244 else
245 bout[i + 8] = buf[imm + i];
248 static void
249 ssse3_test (void)
251 int i;
252 int r [4] __attribute__ ((aligned(16)));
253 int ck [4];
254 unsigned int imm;
255 int fail = 0;
257 for (i = 0; i < 256; i += 8)
258 for (imm = 0; imm < 100; imm++)
260 /* Manually compute the result */
261 compute_correct_result_64 (&vals[i + 0], &vals[i + 4], imm, ck);
263 /* Run the 64-bit tests */
264 ssse3_test_palignr (&vals[i + 0], &vals[i + 4], imm, &r[0]);
265 ssse3_test_palignr (&vals[i + 2], &vals[i + 6], imm, &r[2]);
266 fail += chk_128 (ck, r);
268 /* Recompute the results for 128-bits */
269 compute_correct_result_128 (&vals[i + 0], &vals[i + 4], imm, ck);
271 /* Run the 128-bit tests */
272 ssse3_test_palignr128 (&vals[i + 0], &vals[i + 4], imm, r);
273 fail += chk_128 (ck, r);
276 if (fail != 0)
277 abort ();