2 /* { dg-require-effective-target ssse3 } */
3 /* { dg-options "-O2 -fno-strict-aliasing -mssse3" } */
6 #define CHECK_H "ssse3-check.h"
10 #define TEST ssse3_test
15 #include "ssse3-vals.h"
17 #include <tmmintrin.h>
21 /* Test the 64-bit form */
23 ssse3_test_palignr (int *i1
, int *i2
, unsigned int imm
, int *r
)
25 __m64 t1
= *(__m64
*) i1
;
26 __m64 t2
= *(__m64
*) i2
;
31 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 0);
34 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 1);
37 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 2);
40 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 3);
43 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 4);
46 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 5);
49 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 6);
52 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 7);
55 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 8);
58 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 9);
61 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 10);
64 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 11);
67 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 12);
70 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 13);
73 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 14);
76 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 15);
79 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 16);
87 /* Test the 128-bit form */
89 ssse3_test_palignr128 (int *i1
, int *i2
, unsigned int imm
, int *r
)
91 /* Assumes incoming pointers are 16-byte aligned */
92 __m128i t1
= *(__m128i
*) i1
;
93 __m128i t2
= *(__m128i
*) i2
;
98 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 0);
101 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 1);
104 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 2);
107 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 3);
110 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 4);
113 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 5);
116 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 6);
119 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 7);
122 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 8);
125 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 9);
128 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 10);
131 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 11);
134 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 12);
137 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 13);
140 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 14);
143 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 15);
146 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 16);
149 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 17);
152 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 18);
155 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 19);
158 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 20);
161 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 21);
164 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 22);
167 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 23);
170 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 24);
173 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 25);
176 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 26);
179 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 27);
182 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 28);
185 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 29);
188 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 30);
191 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 31);
194 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 32);
199 /* Routine to manually compute the results */
201 compute_correct_result_128 (int *i1
, int *i2
, unsigned int imm
, int *r
)
204 char *bout
= (char *) r
;
207 memcpy (&buf
[0], i2
, 16);
208 memcpy (&buf
[16], i1
, 16);
210 for (i
= 0; i
< 16; i
++)
211 if (imm
>= 32 || imm
+ i
>= 32)
214 bout
[i
] = buf
[imm
+ i
];
219 compute_correct_result_64 (int *i1
, int *i2
, unsigned int imm
, int *r
)
222 char *bout
= (char *)r
;
225 /* Handle the first half */
226 memcpy (&buf
[0], i2
, 8);
227 memcpy (&buf
[8], i1
, 8);
229 for (i
= 0; i
< 8; i
++)
230 if (imm
>= 16 || imm
+ i
>= 16)
233 bout
[i
] = buf
[imm
+ i
];
235 /* Handle the second half */
236 memcpy (&buf
[0], &i2
[2], 8);
237 memcpy (&buf
[8], &i1
[2], 8);
239 for (i
= 0; i
< 8; i
++)
240 if (imm
>= 16 || imm
+ i
>= 16)
243 bout
[i
+ 8] = buf
[imm
+ i
];
251 int r
[4] __attribute__ ((aligned(16)));
256 for (i
= 0; i
< 256; i
+= 8)
257 for (imm
= 0; imm
< 100; imm
++)
260 /* Manually compute the result */
261 compute_correct_result_64 (&vals
[i
+ 0], &vals
[i
+ 4], imm
, ck
);
263 /* Run the 64-bit tests */
264 ssse3_test_palignr (&vals
[i
+ 0], &vals
[i
+ 4], imm
, &r
[0]);
265 ssse3_test_palignr (&vals
[i
+ 2], &vals
[i
+ 6], imm
, &r
[2]);
266 fail
+= chk_128 (ck
, r
);
269 /* Recompute the results for 128-bits */
270 compute_correct_result_128 (&vals
[i
+ 0], &vals
[i
+ 4], imm
, ck
);
272 /* Run the 128-bit tests */
273 ssse3_test_palignr128 (&vals
[i
+ 0], &vals
[i
+ 4], imm
, r
);
274 fail
+= chk_128 (ck
, r
);