2 /* { dg-require-effective-target ssse3 } */
3 /* { dg-options "-O2 -mssse3" } */
5 #include "ssse3-check.h"
6 #include "ssse3-vals.h"
11 /* Test the 64-bit form */
13 ssse3_test_palignr (int *i1
, int *i2
, unsigned int imm
, int *r
)
15 __m64 t1
= *(__m64
*) i1
;
16 __m64 t2
= *(__m64
*) i2
;
21 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 0);
24 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 1);
27 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 2);
30 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 3);
33 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 4);
36 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 5);
39 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 6);
42 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 7);
45 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 8);
48 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 9);
51 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 10);
54 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 11);
57 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 12);
60 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 13);
63 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 14);
66 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 15);
69 *(__m64
*) r
= _mm_alignr_pi8 (t1
, t2
, 16);
76 /* Test the 128-bit form */
78 ssse3_test_palignr128 (int *i1
, int *i2
, unsigned int imm
, int *r
)
80 /* Assumes incoming pointers are 16-byte aligned */
81 __m128i t1
= *(__m128i
*) i1
;
82 __m128i t2
= *(__m128i
*) i2
;
87 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 0);
90 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 1);
93 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 2);
96 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 3);
99 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 4);
102 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 5);
105 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 6);
108 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 7);
111 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 8);
114 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 9);
117 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 10);
120 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 11);
123 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 12);
126 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 13);
129 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 14);
132 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 15);
135 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 16);
138 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 17);
141 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 18);
144 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 19);
147 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 20);
150 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 21);
153 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 22);
156 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 23);
159 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 24);
162 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 25);
165 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 26);
168 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 27);
171 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 28);
174 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 29);
177 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 30);
180 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 31);
183 *(__m128i
*) r
= _mm_alignr_epi8 (t1
, t2
, 32);
188 /* Routine to manually compute the results */
190 compute_correct_result_128 (int *i1
, int *i2
, unsigned int imm
, int *r
)
193 char *bout
= (char *) r
;
196 memcpy (&buf
[0], i2
, 16);
197 memcpy (&buf
[16], i1
, 16);
199 for (i
= 0; i
< 16; i
++)
200 if (imm
>= 32 || imm
+ i
>= 32)
203 bout
[i
] = buf
[imm
+ i
];
207 compute_correct_result_64 (int *i1
, int *i2
, unsigned int imm
, int *r
)
210 char *bout
= (char *)r
;
213 /* Handle the first half */
214 memcpy (&buf
[0], i2
, 8);
215 memcpy (&buf
[8], i1
, 8);
217 for (i
= 0; i
< 8; i
++)
218 if (imm
>= 16 || imm
+ i
>= 16)
221 bout
[i
] = buf
[imm
+ i
];
223 /* Handle the second half */
224 memcpy (&buf
[0], &i2
[2], 8);
225 memcpy (&buf
[8], &i1
[2], 8);
227 for (i
= 0; i
< 8; i
++)
228 if (imm
>= 16 || imm
+ i
>= 16)
231 bout
[i
+ 8] = buf
[imm
+ i
];
238 int r
[4] __attribute__ ((aligned(16)));
243 for (i
= 0; i
< 256; i
+= 8)
244 for (imm
= 0; imm
< 100; imm
++)
246 /* Manually compute the result */
247 compute_correct_result_64 (&vals
[i
+ 0], &vals
[i
+ 4], imm
, ck
);
249 /* Run the 64-bit tests */
250 ssse3_test_palignr (&vals
[i
+ 0], &vals
[i
+ 4], imm
, &r
[0]);
251 ssse3_test_palignr (&vals
[i
+ 2], &vals
[i
+ 6], imm
, &r
[2]);
252 fail
+= chk_128 (ck
, r
);
254 /* Recompute the results for 128-bits */
255 compute_correct_result_128 (&vals
[i
+ 0], &vals
[i
+ 4], imm
, ck
);
257 /* Run the 128-bit tests */
258 ssse3_test_palignr128 (&vals
[i
+ 0], &vals
[i
+ 4], imm
, r
);
259 fail
+= chk_128 (ck
, r
);