2 /* { dg-require-effective-target avx2 } */
3 /* { dg-options "-O2 -mavx2" } */
6 #include "ssse3-vals.h"
7 #include "avx2-check.h"
9 /* Test the 256-bit form */
11 avx2_test_palignr256 (__m256i t1
, __m256i t2
, unsigned int imm
, __m256i
* r
)
16 *r
= _mm256_alignr_epi8 (t1
, t2
, 0);
19 *r
= _mm256_alignr_epi8 (t1
, t2
, 1);
22 *r
= _mm256_alignr_epi8 (t1
, t2
, 2);
25 *r
= _mm256_alignr_epi8 (t1
, t2
, 3);
28 *r
= _mm256_alignr_epi8 (t1
, t2
, 4);
31 *r
= _mm256_alignr_epi8 (t1
, t2
, 5);
34 *r
= _mm256_alignr_epi8 (t1
, t2
, 6);
37 *r
= _mm256_alignr_epi8 (t1
, t2
, 7);
40 *r
= _mm256_alignr_epi8 (t1
, t2
, 8);
43 *r
= _mm256_alignr_epi8 (t1
, t2
, 9);
46 *r
= _mm256_alignr_epi8 (t1
, t2
, 10);
49 *r
= _mm256_alignr_epi8 (t1
, t2
, 11);
52 *r
= _mm256_alignr_epi8 (t1
, t2
, 12);
55 *r
= _mm256_alignr_epi8 (t1
, t2
, 13);
58 *r
= _mm256_alignr_epi8 (t1
, t2
, 14);
61 *r
= _mm256_alignr_epi8 (t1
, t2
, 15);
64 *r
= _mm256_alignr_epi8 (t1
, t2
, 16);
67 *r
= _mm256_alignr_epi8 (t1
, t2
, 17);
70 *r
= _mm256_alignr_epi8 (t1
, t2
, 18);
73 *r
= _mm256_alignr_epi8 (t1
, t2
, 19);
76 *r
= _mm256_alignr_epi8 (t1
, t2
, 20);
79 *r
= _mm256_alignr_epi8 (t1
, t2
, 21);
82 *r
= _mm256_alignr_epi8 (t1
, t2
, 22);
85 *r
= _mm256_alignr_epi8 (t1
, t2
, 23);
88 *r
= _mm256_alignr_epi8 (t1
, t2
, 24);
91 *r
= _mm256_alignr_epi8 (t1
, t2
, 25);
94 *r
= _mm256_alignr_epi8 (t1
, t2
, 26);
97 *r
= _mm256_alignr_epi8 (t1
, t2
, 27);
100 *r
= _mm256_alignr_epi8 (t1
, t2
, 28);
103 *r
= _mm256_alignr_epi8 (t1
, t2
, 29);
106 *r
= _mm256_alignr_epi8 (t1
, t2
, 30);
109 *r
= _mm256_alignr_epi8 (t1
, t2
, 31);
112 *r
= _mm256_alignr_epi8 (t1
, t2
, 32);
117 /* Routine to manually compute the results */
119 compute_correct_result_256 (int *i1
, int *i2
, unsigned int imm
, int *r
)
122 char *bout
= (char *) r
;
125 /* Fill lowers 128 bit of ymm */
126 memcpy (&buf
[0], i2
, 16);
127 memcpy (&buf
[16], i1
, 16);
129 for (i
= 0; i
< 16; i
++)
130 if (imm
>= 32 || imm
+ i
>= 32)
133 bout
[i
] = buf
[imm
+ i
];
135 /* Fill higher 128 bit of ymm */
137 memcpy (&buf
[0], i2
+ 4, 16);
138 memcpy (&buf
[16], i1
+ 4, 16);
140 for (i
= 0; i
< 16; i
++)
141 if (imm
>= 32 || imm
+ i
>= 32)
144 bout
[i
] = buf
[imm
+ i
];
156 union256i_q s1
, s2
, d
;
158 for (i
= 0; i
< 256; i
+= 16)
159 for (imm
= 0; imm
< 100; imm
++)
161 /* Recompute the results for 256-bits */
162 compute_correct_result_256 (&vals
[i
+ 0], &vals
[i
+ 8], imm
, ck
);
164 s1
.x
= _mm256_loadu_si256 ((__m256i
*) & vals
[i
+ 0]);
165 s2
.x
= _mm256_loadu_si256 ((__m256i
*) & vals
[i
+ 8]);
167 /* Run the 256-bit tests */
168 avx2_test_palignr256 (s1
.x
, s2
.x
, imm
, &d
.x
);
170 _mm256_storeu_si256 ((__m256i
*) r
, d
.x
);
172 fail
+= checkVi (r
, ck
, 8);