1 /* { dg-do run { target i?86-*-* x86_64-*-* } } */
2 /* { dg-options "-O2 -mssse3" } */
6 #include "../../gcc.dg/i386-cpuid.h"
7 #include "ssse3-vals.h"
9 static void ssse3_test (void);
14 unsigned long cpu_facilities
;
16 cpu_facilities
= i386_cpuid_ecx ();
18 /* Run SSSE3 test only if host has SSSE3 support. */
19 if ((cpu_facilities
& bit_SSSE3
))
25 /* Test the 64-bit form */
27 ssse3_test_palignr (int *i1
, int *i2
, unsigned int imm
, int *r
)
29 __m64 t1
= *(__m64
*) i1
;
30 __m64 t2
= *(__m64
*) i2
;
35 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 0);
38 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 1);
41 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 2);
44 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 3);
47 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 4);
50 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 5);
53 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 6);
56 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 7);
59 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 8);
62 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 9);
65 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 10);
68 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 11);
71 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 12);
74 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 13);
77 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 14);
80 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 15);
83 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 16);
90 /* Test the 128-bit form */
92 ssse3_test_palignr128 (int *i1
, int *i2
, unsigned int imm
, int *r
)
94 /* Assumes incoming pointers are 16-byte aligned */
95 __m128i t1
= *(__m128i
*) i1
;
96 __m128i t2
= *(__m128i
*) i2
;
101 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 0);
104 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 1);
107 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 2);
110 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 3);
113 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 4);
116 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 5);
119 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 6);
122 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 7);
125 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 8);
128 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 9);
131 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 10);
134 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 11);
137 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 12);
140 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 13);
143 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 14);
146 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 15);
149 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 16);
152 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 17);
155 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 18);
158 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 19);
161 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 20);
164 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 21);
167 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 22);
170 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 23);
173 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 24);
176 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 25);
179 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 26);
182 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 27);
185 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 28);
188 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 29);
191 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 30);
194 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 31);
197 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 32);
202 /* Routine to manually compute the results */
204 compute_correct_result_128 (int *i1
, int *i2
, unsigned int imm
, int *r
)
207 char *bout
= (char *) r
;
210 memcpy (&buf
[0], i2
, 16);
211 memcpy (&buf
[16], i1
, 16);
213 for (i
= 0; i
< 16; i
++)
214 if (imm
>= 32 || imm
+ i
>= 32)
217 bout
[i
] = buf
[imm
+ i
];
221 compute_correct_result_64 (int *i1
, int *i2
, unsigned int imm
, int *r
)
224 char *bout
= (char *)r
;
227 /* Handle the first half */
228 memcpy (&buf
[0], i2
, 8);
229 memcpy (&buf
[8], i1
, 8);
231 for (i
= 0; i
< 8; i
++)
232 if (imm
>= 16 || imm
+ i
>= 16)
235 bout
[i
] = buf
[imm
+ i
];
237 /* Handle the second half */
238 memcpy (&buf
[0], &i2
[2], 8);
239 memcpy (&buf
[8], &i1
[2], 8);
241 for (i
= 0; i
< 8; i
++)
242 if (imm
>= 16 || imm
+ i
>= 16)
245 bout
[i
+ 8] = buf
[imm
+ i
];
252 int r
[4] __attribute__ ((aligned(16)));
257 for (i
= 0; i
< 256; i
+= 8)
258 for (imm
= 0; imm
< 100; imm
++)
260 /* Manually compute the result */
261 compute_correct_result_64 (&vals
[i
+ 0], &vals
[i
+ 4], imm
, ck
);
263 /* Run the 64-bit tests */
264 ssse3_test_palignr (&vals
[i
+ 0], &vals
[i
+ 4], imm
, &r
[0]);
265 ssse3_test_palignr (&vals
[i
+ 2], &vals
[i
+ 6], imm
, &r
[2]);
266 fail
+= chk_128 (ck
, r
);
268 /* Recompute the results for 128-bits */
269 compute_correct_result_128 (&vals
[i
+ 0], &vals
[i
+ 4], imm
, ck
);
271 /* Run the 128-bit tests */
272 ssse3_test_palignr128 (&vals
[i
+ 0], &vals
[i
+ 4], imm
, r
);
273 fail
+= chk_128 (ck
, r
);