1 /*===---- avx512vlintrin.h - AVX512VL intrinsics ---------------------------===
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 *===-----------------------------------------------------------------------===
25 #error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
28 #ifndef __AVX512VLINTRIN_H
29 #define __AVX512VLINTRIN_H
31 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl")))
33 /* Doesn't require avx512vl, used in avx512dqintrin.h */
34 static __inline __m128i
__attribute__((__always_inline__
, __nodebug__
, __target__("avx512f")))
35 _mm_setzero_di(void) {
36 return (__m128i
)(__v2di
){ 0LL, 0LL};
41 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
42 _mm_cmpeq_epi32_mask(__m128i __a
, __m128i __b
) {
43 return (__mmask8
)__builtin_ia32_pcmpeqd128_mask((__v4si
)__a
, (__v4si
)__b
,
47 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
48 _mm_mask_cmpeq_epi32_mask(__mmask8 __u
, __m128i __a
, __m128i __b
) {
49 return (__mmask8
)__builtin_ia32_pcmpeqd128_mask((__v4si
)__a
, (__v4si
)__b
,
53 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
54 _mm_cmpeq_epu32_mask(__m128i __a
, __m128i __b
) {
55 return (__mmask8
)__builtin_ia32_ucmpd128_mask((__v4si
)__a
, (__v4si
)__b
, 0,
59 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
60 _mm_mask_cmpeq_epu32_mask(__mmask8 __u
, __m128i __a
, __m128i __b
) {
61 return (__mmask8
)__builtin_ia32_ucmpd128_mask((__v4si
)__a
, (__v4si
)__b
, 0,
65 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
66 _mm256_cmpeq_epi32_mask(__m256i __a
, __m256i __b
) {
67 return (__mmask8
)__builtin_ia32_pcmpeqd256_mask((__v8si
)__a
, (__v8si
)__b
,
71 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
72 _mm256_mask_cmpeq_epi32_mask(__mmask8 __u
, __m256i __a
, __m256i __b
) {
73 return (__mmask8
)__builtin_ia32_pcmpeqd256_mask((__v8si
)__a
, (__v8si
)__b
,
77 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
78 _mm256_cmpeq_epu32_mask(__m256i __a
, __m256i __b
) {
79 return (__mmask8
)__builtin_ia32_ucmpd256_mask((__v8si
)__a
, (__v8si
)__b
, 0,
83 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
84 _mm256_mask_cmpeq_epu32_mask(__mmask8 __u
, __m256i __a
, __m256i __b
) {
85 return (__mmask8
)__builtin_ia32_ucmpd256_mask((__v8si
)__a
, (__v8si
)__b
, 0,
89 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
90 _mm_cmpeq_epi64_mask(__m128i __a
, __m128i __b
) {
91 return (__mmask8
)__builtin_ia32_pcmpeqq128_mask((__v2di
)__a
, (__v2di
)__b
,
95 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
96 _mm_mask_cmpeq_epi64_mask(__mmask8 __u
, __m128i __a
, __m128i __b
) {
97 return (__mmask8
)__builtin_ia32_pcmpeqq128_mask((__v2di
)__a
, (__v2di
)__b
,
101 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
102 _mm_cmpeq_epu64_mask(__m128i __a
, __m128i __b
) {
103 return (__mmask8
)__builtin_ia32_ucmpq128_mask((__v2di
)__a
, (__v2di
)__b
, 0,
107 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
108 _mm_mask_cmpeq_epu64_mask(__mmask8 __u
, __m128i __a
, __m128i __b
) {
109 return (__mmask8
)__builtin_ia32_ucmpq128_mask((__v2di
)__a
, (__v2di
)__b
, 0,
113 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
114 _mm256_cmpeq_epi64_mask(__m256i __a
, __m256i __b
) {
115 return (__mmask8
)__builtin_ia32_pcmpeqq256_mask((__v4di
)__a
, (__v4di
)__b
,
119 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
120 _mm256_mask_cmpeq_epi64_mask(__mmask8 __u
, __m256i __a
, __m256i __b
) {
121 return (__mmask8
)__builtin_ia32_pcmpeqq256_mask((__v4di
)__a
, (__v4di
)__b
,
125 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
126 _mm256_cmpeq_epu64_mask(__m256i __a
, __m256i __b
) {
127 return (__mmask8
)__builtin_ia32_ucmpq256_mask((__v4di
)__a
, (__v4di
)__b
, 0,
131 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
132 _mm256_mask_cmpeq_epu64_mask(__mmask8 __u
, __m256i __a
, __m256i __b
) {
133 return (__mmask8
)__builtin_ia32_ucmpq256_mask((__v4di
)__a
, (__v4di
)__b
, 0,
138 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
139 _mm_cmpge_epi32_mask(__m128i __a
, __m128i __b
) {
140 return (__mmask8
)__builtin_ia32_cmpd128_mask((__v4si
)__a
, (__v4si
)__b
, 5,
144 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
145 _mm_mask_cmpge_epi32_mask(__mmask8 __u
, __m128i __a
, __m128i __b
) {
146 return (__mmask8
)__builtin_ia32_cmpd128_mask((__v4si
)__a
, (__v4si
)__b
, 5,
150 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
151 _mm_cmpge_epu32_mask(__m128i __a
, __m128i __b
) {
152 return (__mmask8
)__builtin_ia32_ucmpd128_mask((__v4si
)__a
, (__v4si
)__b
, 5,
156 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
157 _mm_mask_cmpge_epu32_mask(__mmask8 __u
, __m128i __a
, __m128i __b
) {
158 return (__mmask8
)__builtin_ia32_ucmpd128_mask((__v4si
)__a
, (__v4si
)__b
, 5,
162 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
163 _mm256_cmpge_epi32_mask(__m256i __a
, __m256i __b
) {
164 return (__mmask8
)__builtin_ia32_cmpd256_mask((__v8si
)__a
, (__v8si
)__b
, 5,
168 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
169 _mm256_mask_cmpge_epi32_mask(__mmask8 __u
, __m256i __a
, __m256i __b
) {
170 return (__mmask8
)__builtin_ia32_cmpd256_mask((__v8si
)__a
, (__v8si
)__b
, 5,
174 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
175 _mm256_cmpge_epu32_mask(__m256i __a
, __m256i __b
) {
176 return (__mmask8
)__builtin_ia32_ucmpd256_mask((__v8si
)__a
, (__v8si
)__b
, 5,
180 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
181 _mm256_mask_cmpge_epu32_mask(__mmask8 __u
, __m256i __a
, __m256i __b
) {
182 return (__mmask8
)__builtin_ia32_ucmpd256_mask((__v8si
)__a
, (__v8si
)__b
, 5,
186 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
187 _mm_cmpge_epi64_mask(__m128i __a
, __m128i __b
) {
188 return (__mmask8
)__builtin_ia32_cmpq128_mask((__v2di
)__a
, (__v2di
)__b
, 5,
192 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
193 _mm_mask_cmpge_epi64_mask(__mmask8 __u
, __m128i __a
, __m128i __b
) {
194 return (__mmask8
)__builtin_ia32_cmpq128_mask((__v2di
)__a
, (__v2di
)__b
, 5,
198 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
199 _mm_cmpge_epu64_mask(__m128i __a
, __m128i __b
) {
200 return (__mmask8
)__builtin_ia32_ucmpq128_mask((__v2di
)__a
, (__v2di
)__b
, 5,
204 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
205 _mm_mask_cmpge_epu64_mask(__mmask8 __u
, __m128i __a
, __m128i __b
) {
206 return (__mmask8
)__builtin_ia32_ucmpq128_mask((__v2di
)__a
, (__v2di
)__b
, 5,
210 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
211 _mm256_cmpge_epi64_mask(__m256i __a
, __m256i __b
) {
212 return (__mmask8
)__builtin_ia32_cmpq256_mask((__v4di
)__a
, (__v4di
)__b
, 5,
216 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
217 _mm256_mask_cmpge_epi64_mask(__mmask8 __u
, __m256i __a
, __m256i __b
) {
218 return (__mmask8
)__builtin_ia32_cmpq256_mask((__v4di
)__a
, (__v4di
)__b
, 5,
222 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
223 _mm256_cmpge_epu64_mask(__m256i __a
, __m256i __b
) {
224 return (__mmask8
)__builtin_ia32_ucmpq256_mask((__v4di
)__a
, (__v4di
)__b
, 5,
228 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
229 _mm256_mask_cmpge_epu64_mask(__mmask8 __u
, __m256i __a
, __m256i __b
) {
230 return (__mmask8
)__builtin_ia32_ucmpq256_mask((__v4di
)__a
, (__v4di
)__b
, 5,
234 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
235 _mm_cmpgt_epi32_mask(__m128i __a
, __m128i __b
) {
236 return (__mmask8
)__builtin_ia32_pcmpgtd128_mask((__v4si
)__a
, (__v4si
)__b
,
240 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
241 _mm_mask_cmpgt_epi32_mask(__mmask8 __u
, __m128i __a
, __m128i __b
) {
242 return (__mmask8
)__builtin_ia32_pcmpgtd128_mask((__v4si
)__a
, (__v4si
)__b
,
246 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
247 _mm_cmpgt_epu32_mask(__m128i __a
, __m128i __b
) {
248 return (__mmask8
)__builtin_ia32_ucmpd128_mask((__v4si
)__a
, (__v4si
)__b
, 6,
252 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
253 _mm_mask_cmpgt_epu32_mask(__mmask8 __u
, __m128i __a
, __m128i __b
) {
254 return (__mmask8
)__builtin_ia32_ucmpd128_mask((__v4si
)__a
, (__v4si
)__b
, 6,
258 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
259 _mm256_cmpgt_epi32_mask(__m256i __a
, __m256i __b
) {
260 return (__mmask8
)__builtin_ia32_pcmpgtd256_mask((__v8si
)__a
, (__v8si
)__b
,
264 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
265 _mm256_mask_cmpgt_epi32_mask(__mmask8 __u
, __m256i __a
, __m256i __b
) {
266 return (__mmask8
)__builtin_ia32_pcmpgtd256_mask((__v8si
)__a
, (__v8si
)__b
,
270 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
271 _mm256_cmpgt_epu32_mask(__m256i __a
, __m256i __b
) {
272 return (__mmask8
)__builtin_ia32_ucmpd256_mask((__v8si
)__a
, (__v8si
)__b
, 6,
276 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
277 _mm256_mask_cmpgt_epu32_mask(__mmask8 __u
, __m256i __a
, __m256i __b
) {
278 return (__mmask8
)__builtin_ia32_ucmpd256_mask((__v8si
)__a
, (__v8si
)__b
, 6,
282 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
283 _mm_cmpgt_epi64_mask(__m128i __a
, __m128i __b
) {
284 return (__mmask8
)__builtin_ia32_pcmpgtq128_mask((__v2di
)__a
, (__v2di
)__b
,
288 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
289 _mm_mask_cmpgt_epi64_mask(__mmask8 __u
, __m128i __a
, __m128i __b
) {
290 return (__mmask8
)__builtin_ia32_pcmpgtq128_mask((__v2di
)__a
, (__v2di
)__b
,
294 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
295 _mm_cmpgt_epu64_mask(__m128i __a
, __m128i __b
) {
296 return (__mmask8
)__builtin_ia32_ucmpq128_mask((__v2di
)__a
, (__v2di
)__b
, 6,
300 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
301 _mm_mask_cmpgt_epu64_mask(__mmask8 __u
, __m128i __a
, __m128i __b
) {
302 return (__mmask8
)__builtin_ia32_ucmpq128_mask((__v2di
)__a
, (__v2di
)__b
, 6,
306 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
307 _mm256_cmpgt_epi64_mask(__m256i __a
, __m256i __b
) {
308 return (__mmask8
)__builtin_ia32_pcmpgtq256_mask((__v4di
)__a
, (__v4di
)__b
,
312 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
313 _mm256_mask_cmpgt_epi64_mask(__mmask8 __u
, __m256i __a
, __m256i __b
) {
314 return (__mmask8
)__builtin_ia32_pcmpgtq256_mask((__v4di
)__a
, (__v4di
)__b
,
318 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
319 _mm256_cmpgt_epu64_mask(__m256i __a
, __m256i __b
) {
320 return (__mmask8
)__builtin_ia32_ucmpq256_mask((__v4di
)__a
, (__v4di
)__b
, 6,
324 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
325 _mm256_mask_cmpgt_epu64_mask(__mmask8 __u
, __m256i __a
, __m256i __b
) {
326 return (__mmask8
)__builtin_ia32_ucmpq256_mask((__v4di
)__a
, (__v4di
)__b
, 6,
330 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
331 _mm_cmple_epi32_mask(__m128i __a
, __m128i __b
) {
332 return (__mmask8
)__builtin_ia32_cmpd128_mask((__v4si
)__a
, (__v4si
)__b
, 2,
336 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
337 _mm_mask_cmple_epi32_mask(__mmask8 __u
, __m128i __a
, __m128i __b
) {
338 return (__mmask8
)__builtin_ia32_cmpd128_mask((__v4si
)__a
, (__v4si
)__b
, 2,
342 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
343 _mm_cmple_epu32_mask(__m128i __a
, __m128i __b
) {
344 return (__mmask8
)__builtin_ia32_ucmpd128_mask((__v4si
)__a
, (__v4si
)__b
, 2,
348 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
349 _mm_mask_cmple_epu32_mask(__mmask8 __u
, __m128i __a
, __m128i __b
) {
350 return (__mmask8
)__builtin_ia32_ucmpd128_mask((__v4si
)__a
, (__v4si
)__b
, 2,
354 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
355 _mm256_cmple_epi32_mask(__m256i __a
, __m256i __b
) {
356 return (__mmask8
)__builtin_ia32_cmpd256_mask((__v8si
)__a
, (__v8si
)__b
, 2,
360 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
361 _mm256_mask_cmple_epi32_mask(__mmask8 __u
, __m256i __a
, __m256i __b
) {
362 return (__mmask8
)__builtin_ia32_cmpd256_mask((__v8si
)__a
, (__v8si
)__b
, 2,
366 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
367 _mm256_cmple_epu32_mask(__m256i __a
, __m256i __b
) {
368 return (__mmask8
)__builtin_ia32_ucmpd256_mask((__v8si
)__a
, (__v8si
)__b
, 2,
372 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
373 _mm256_mask_cmple_epu32_mask(__mmask8 __u
, __m256i __a
, __m256i __b
) {
374 return (__mmask8
)__builtin_ia32_ucmpd256_mask((__v8si
)__a
, (__v8si
)__b
, 2,
378 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
379 _mm_cmple_epi64_mask(__m128i __a
, __m128i __b
) {
380 return (__mmask8
)__builtin_ia32_cmpq128_mask((__v2di
)__a
, (__v2di
)__b
, 2,
384 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
385 _mm_mask_cmple_epi64_mask(__mmask8 __u
, __m128i __a
, __m128i __b
) {
386 return (__mmask8
)__builtin_ia32_cmpq128_mask((__v2di
)__a
, (__v2di
)__b
, 2,
390 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
391 _mm_cmple_epu64_mask(__m128i __a
, __m128i __b
) {
392 return (__mmask8
)__builtin_ia32_ucmpq128_mask((__v2di
)__a
, (__v2di
)__b
, 2,
396 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
397 _mm_mask_cmple_epu64_mask(__mmask8 __u
, __m128i __a
, __m128i __b
) {
398 return (__mmask8
)__builtin_ia32_ucmpq128_mask((__v2di
)__a
, (__v2di
)__b
, 2,
402 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
403 _mm256_cmple_epi64_mask(__m256i __a
, __m256i __b
) {
404 return (__mmask8
)__builtin_ia32_cmpq256_mask((__v4di
)__a
, (__v4di
)__b
, 2,
408 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
409 _mm256_mask_cmple_epi64_mask(__mmask8 __u
, __m256i __a
, __m256i __b
) {
410 return (__mmask8
)__builtin_ia32_cmpq256_mask((__v4di
)__a
, (__v4di
)__b
, 2,
414 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
415 _mm256_cmple_epu64_mask(__m256i __a
, __m256i __b
) {
416 return (__mmask8
)__builtin_ia32_ucmpq256_mask((__v4di
)__a
, (__v4di
)__b
, 2,
420 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
421 _mm256_mask_cmple_epu64_mask(__mmask8 __u
, __m256i __a
, __m256i __b
) {
422 return (__mmask8
)__builtin_ia32_ucmpq256_mask((__v4di
)__a
, (__v4di
)__b
, 2,
426 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
427 _mm_cmplt_epi32_mask(__m128i __a
, __m128i __b
) {
428 return (__mmask8
)__builtin_ia32_cmpd128_mask((__v4si
)__a
, (__v4si
)__b
, 1,
432 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
433 _mm_mask_cmplt_epi32_mask(__mmask8 __u
, __m128i __a
, __m128i __b
) {
434 return (__mmask8
)__builtin_ia32_cmpd128_mask((__v4si
)__a
, (__v4si
)__b
, 1,
438 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
439 _mm_cmplt_epu32_mask(__m128i __a
, __m128i __b
) {
440 return (__mmask8
)__builtin_ia32_ucmpd128_mask((__v4si
)__a
, (__v4si
)__b
, 1,
444 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
445 _mm_mask_cmplt_epu32_mask(__mmask8 __u
, __m128i __a
, __m128i __b
) {
446 return (__mmask8
)__builtin_ia32_ucmpd128_mask((__v4si
)__a
, (__v4si
)__b
, 1,
450 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
451 _mm256_cmplt_epi32_mask(__m256i __a
, __m256i __b
) {
452 return (__mmask8
)__builtin_ia32_cmpd256_mask((__v8si
)__a
, (__v8si
)__b
, 1,
456 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
457 _mm256_mask_cmplt_epi32_mask(__mmask8 __u
, __m256i __a
, __m256i __b
) {
458 return (__mmask8
)__builtin_ia32_cmpd256_mask((__v8si
)__a
, (__v8si
)__b
, 1,
462 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
463 _mm256_cmplt_epu32_mask(__m256i __a
, __m256i __b
) {
464 return (__mmask8
)__builtin_ia32_ucmpd256_mask((__v8si
)__a
, (__v8si
)__b
, 1,
468 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
469 _mm256_mask_cmplt_epu32_mask(__mmask8 __u
, __m256i __a
, __m256i __b
) {
470 return (__mmask8
)__builtin_ia32_ucmpd256_mask((__v8si
)__a
, (__v8si
)__b
, 1,
474 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
475 _mm_cmplt_epi64_mask(__m128i __a
, __m128i __b
) {
476 return (__mmask8
)__builtin_ia32_cmpq128_mask((__v2di
)__a
, (__v2di
)__b
, 1,
480 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
481 _mm_mask_cmplt_epi64_mask(__mmask8 __u
, __m128i __a
, __m128i __b
) {
482 return (__mmask8
)__builtin_ia32_cmpq128_mask((__v2di
)__a
, (__v2di
)__b
, 1,
486 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
487 _mm_cmplt_epu64_mask(__m128i __a
, __m128i __b
) {
488 return (__mmask8
)__builtin_ia32_ucmpq128_mask((__v2di
)__a
, (__v2di
)__b
, 1,
492 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
493 _mm_mask_cmplt_epu64_mask(__mmask8 __u
, __m128i __a
, __m128i __b
) {
494 return (__mmask8
)__builtin_ia32_ucmpq128_mask((__v2di
)__a
, (__v2di
)__b
, 1,
498 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
499 _mm256_cmplt_epi64_mask(__m256i __a
, __m256i __b
) {
500 return (__mmask8
)__builtin_ia32_cmpq256_mask((__v4di
)__a
, (__v4di
)__b
, 1,
504 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
505 _mm256_mask_cmplt_epi64_mask(__mmask8 __u
, __m256i __a
, __m256i __b
) {
506 return (__mmask8
)__builtin_ia32_cmpq256_mask((__v4di
)__a
, (__v4di
)__b
, 1,
510 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
511 _mm256_cmplt_epu64_mask(__m256i __a
, __m256i __b
) {
512 return (__mmask8
)__builtin_ia32_ucmpq256_mask((__v4di
)__a
, (__v4di
)__b
, 1,
516 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
517 _mm256_mask_cmplt_epu64_mask(__mmask8 __u
, __m256i __a
, __m256i __b
) {
518 return (__mmask8
)__builtin_ia32_ucmpq256_mask((__v4di
)__a
, (__v4di
)__b
, 1,
522 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
523 _mm_cmpneq_epi32_mask(__m128i __a
, __m128i __b
) {
524 return (__mmask8
)__builtin_ia32_cmpd128_mask((__v4si
)__a
, (__v4si
)__b
, 4,
528 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
529 _mm_mask_cmpneq_epi32_mask(__mmask8 __u
, __m128i __a
, __m128i __b
) {
530 return (__mmask8
)__builtin_ia32_cmpd128_mask((__v4si
)__a
, (__v4si
)__b
, 4,
534 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
535 _mm_cmpneq_epu32_mask(__m128i __a
, __m128i __b
) {
536 return (__mmask8
)__builtin_ia32_ucmpd128_mask((__v4si
)__a
, (__v4si
)__b
, 4,
540 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
541 _mm_mask_cmpneq_epu32_mask(__mmask8 __u
, __m128i __a
, __m128i __b
) {
542 return (__mmask8
)__builtin_ia32_ucmpd128_mask((__v4si
)__a
, (__v4si
)__b
, 4,
546 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
547 _mm256_cmpneq_epi32_mask(__m256i __a
, __m256i __b
) {
548 return (__mmask8
)__builtin_ia32_cmpd256_mask((__v8si
)__a
, (__v8si
)__b
, 4,
552 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
553 _mm256_mask_cmpneq_epi32_mask(__mmask8 __u
, __m256i __a
, __m256i __b
) {
554 return (__mmask8
)__builtin_ia32_cmpd256_mask((__v8si
)__a
, (__v8si
)__b
, 4,
558 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
559 _mm256_cmpneq_epu32_mask(__m256i __a
, __m256i __b
) {
560 return (__mmask8
)__builtin_ia32_ucmpd256_mask((__v8si
)__a
, (__v8si
)__b
, 4,
564 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
565 _mm256_mask_cmpneq_epu32_mask(__mmask8 __u
, __m256i __a
, __m256i __b
) {
566 return (__mmask8
)__builtin_ia32_ucmpd256_mask((__v8si
)__a
, (__v8si
)__b
, 4,
570 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
571 _mm_cmpneq_epi64_mask(__m128i __a
, __m128i __b
) {
572 return (__mmask8
)__builtin_ia32_cmpq128_mask((__v2di
)__a
, (__v2di
)__b
, 4,
576 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
577 _mm_mask_cmpneq_epi64_mask(__mmask8 __u
, __m128i __a
, __m128i __b
) {
578 return (__mmask8
)__builtin_ia32_cmpq128_mask((__v2di
)__a
, (__v2di
)__b
, 4,
582 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
583 _mm_cmpneq_epu64_mask(__m128i __a
, __m128i __b
) {
584 return (__mmask8
)__builtin_ia32_ucmpq128_mask((__v2di
)__a
, (__v2di
)__b
, 4,
588 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
589 _mm_mask_cmpneq_epu64_mask(__mmask8 __u
, __m128i __a
, __m128i __b
) {
590 return (__mmask8
)__builtin_ia32_ucmpq128_mask((__v2di
)__a
, (__v2di
)__b
, 4,
594 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
595 _mm256_cmpneq_epi64_mask(__m256i __a
, __m256i __b
) {
596 return (__mmask8
)__builtin_ia32_cmpq256_mask((__v4di
)__a
, (__v4di
)__b
, 4,
600 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
601 _mm256_mask_cmpneq_epi64_mask(__mmask8 __u
, __m256i __a
, __m256i __b
) {
602 return (__mmask8
)__builtin_ia32_cmpq256_mask((__v4di
)__a
, (__v4di
)__b
, 4,
606 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
607 _mm256_cmpneq_epu64_mask(__m256i __a
, __m256i __b
) {
608 return (__mmask8
)__builtin_ia32_ucmpq256_mask((__v4di
)__a
, (__v4di
)__b
, 4,
612 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
613 _mm256_mask_cmpneq_epu64_mask(__mmask8 __u
, __m256i __a
, __m256i __b
) {
614 return (__mmask8
)__builtin_ia32_ucmpq256_mask((__v4di
)__a
, (__v4di
)__b
, 4,
618 static __inline__ __m256i __DEFAULT_FN_ATTRS
619 _mm256_mask_add_epi32 (__m256i __W
, __mmask8 __U
, __m256i __A
,
622 return (__m256i
) __builtin_ia32_paddd256_mask ((__v8si
) __A
,
628 static __inline__ __m256i __DEFAULT_FN_ATTRS
629 _mm256_maskz_add_epi32 (__mmask8 __U
, __m256i __A
, __m256i __B
)
631 return (__m256i
) __builtin_ia32_paddd256_mask ((__v8si
) __A
,
634 _mm256_setzero_si256 (),
638 static __inline__ __m256i __DEFAULT_FN_ATTRS
639 _mm256_mask_add_epi64 (__m256i __W
, __mmask8 __U
, __m256i __A
,
642 return (__m256i
) __builtin_ia32_paddq256_mask ((__v4di
) __A
,
648 static __inline__ __m256i __DEFAULT_FN_ATTRS
649 _mm256_maskz_add_epi64 (__mmask8 __U
, __m256i __A
, __m256i __B
)
651 return (__m256i
) __builtin_ia32_paddq256_mask ((__v4di
) __A
,
654 _mm256_setzero_si256 (),
658 static __inline__ __m256i __DEFAULT_FN_ATTRS
659 _mm256_mask_sub_epi32 (__m256i __W
, __mmask8 __U
, __m256i __A
,
662 return (__m256i
) __builtin_ia32_psubd256_mask ((__v8si
) __A
,
668 static __inline__ __m256i __DEFAULT_FN_ATTRS
669 _mm256_maskz_sub_epi32 (__mmask8 __U
, __m256i __A
, __m256i __B
)
671 return (__m256i
) __builtin_ia32_psubd256_mask ((__v8si
) __A
,
674 _mm256_setzero_si256 (),
678 static __inline__ __m256i __DEFAULT_FN_ATTRS
679 _mm256_mask_sub_epi64 (__m256i __W
, __mmask8 __U
, __m256i __A
,
682 return (__m256i
) __builtin_ia32_psubq256_mask ((__v4di
) __A
,
688 static __inline__ __m256i __DEFAULT_FN_ATTRS
689 _mm256_maskz_sub_epi64 (__mmask8 __U
, __m256i __A
, __m256i __B
)
691 return (__m256i
) __builtin_ia32_psubq256_mask ((__v4di
) __A
,
694 _mm256_setzero_si256 (),
698 static __inline__ __m128i __DEFAULT_FN_ATTRS
699 _mm_mask_add_epi32 (__m128i __W
, __mmask8 __U
, __m128i __A
,
702 return (__m128i
) __builtin_ia32_paddd128_mask ((__v4si
) __A
,
708 static __inline__ __m128i __DEFAULT_FN_ATTRS
709 _mm_maskz_add_epi32 (__mmask8 __U
, __m128i __A
, __m128i __B
)
711 return (__m128i
) __builtin_ia32_paddd128_mask ((__v4si
) __A
,
714 _mm_setzero_si128 (),
718 static __inline__ __m128i __DEFAULT_FN_ATTRS
719 _mm_mask_add_epi64 (__m128i __W
, __mmask8 __U
, __m128i __A
,
722 return (__m128i
) __builtin_ia32_paddq128_mask ((__v2di
) __A
,
728 static __inline__ __m128i __DEFAULT_FN_ATTRS
729 _mm_maskz_add_epi64 (__mmask8 __U
, __m128i __A
, __m128i __B
)
731 return (__m128i
) __builtin_ia32_paddq128_mask ((__v2di
) __A
,
734 _mm_setzero_si128 (),
738 static __inline__ __m128i __DEFAULT_FN_ATTRS
739 _mm_mask_sub_epi32 (__m128i __W
, __mmask8 __U
, __m128i __A
,
742 return (__m128i
) __builtin_ia32_psubd128_mask ((__v4si
) __A
,
748 static __inline__ __m128i __DEFAULT_FN_ATTRS
749 _mm_maskz_sub_epi32 (__mmask8 __U
, __m128i __A
, __m128i __B
)
751 return (__m128i
) __builtin_ia32_psubd128_mask ((__v4si
) __A
,
754 _mm_setzero_si128 (),
758 static __inline__ __m128i __DEFAULT_FN_ATTRS
759 _mm_mask_sub_epi64 (__m128i __W
, __mmask8 __U
, __m128i __A
,
762 return (__m128i
) __builtin_ia32_psubq128_mask ((__v2di
) __A
,
768 static __inline__ __m128i __DEFAULT_FN_ATTRS
769 _mm_maskz_sub_epi64 (__mmask8 __U
, __m128i __A
, __m128i __B
)
771 return (__m128i
) __builtin_ia32_psubq128_mask ((__v2di
) __A
,
774 _mm_setzero_si128 (),
778 static __inline__ __m256i __DEFAULT_FN_ATTRS
779 _mm256_mask_mul_epi32 (__m256i __W
, __mmask8 __M
, __m256i __X
,
782 return (__m256i
) __builtin_ia32_pmuldq256_mask ((__v8si
) __X
,
787 static __inline__ __m256i __DEFAULT_FN_ATTRS
788 _mm256_maskz_mul_epi32 (__mmask8 __M
, __m256i __X
, __m256i __Y
)
790 return (__m256i
) __builtin_ia32_pmuldq256_mask ((__v8si
) __X
,
793 _mm256_setzero_si256 (),
797 static __inline__ __m128i __DEFAULT_FN_ATTRS
798 _mm_mask_mul_epi32 (__m128i __W
, __mmask8 __M
, __m128i __X
,
801 return (__m128i
) __builtin_ia32_pmuldq128_mask ((__v4si
) __X
,
806 static __inline__ __m128i __DEFAULT_FN_ATTRS
807 _mm_maskz_mul_epi32 (__mmask8 __M
, __m128i __X
, __m128i __Y
)
809 return (__m128i
) __builtin_ia32_pmuldq128_mask ((__v4si
) __X
,
812 _mm_setzero_si128 (),
816 static __inline__ __m256i __DEFAULT_FN_ATTRS
817 _mm256_mask_mul_epu32 (__m256i __W
, __mmask8 __M
, __m256i __X
,
820 return (__m256i
) __builtin_ia32_pmuludq256_mask ((__v8si
) __X
,
825 static __inline__ __m256i __DEFAULT_FN_ATTRS
826 _mm256_maskz_mul_epu32 (__mmask8 __M
, __m256i __X
, __m256i __Y
)
828 return (__m256i
) __builtin_ia32_pmuludq256_mask ((__v8si
) __X
,
831 _mm256_setzero_si256 (),
835 static __inline__ __m128i __DEFAULT_FN_ATTRS
836 _mm_mask_mul_epu32 (__m128i __W
, __mmask8 __M
, __m128i __X
,
839 return (__m128i
) __builtin_ia32_pmuludq128_mask ((__v4si
) __X
,
844 static __inline__ __m128i __DEFAULT_FN_ATTRS
845 _mm_maskz_mul_epu32 (__mmask8 __M
, __m128i __X
, __m128i __Y
)
847 return (__m128i
) __builtin_ia32_pmuludq128_mask ((__v4si
) __X
,
850 _mm_setzero_si128 (),
854 static __inline__ __m256i __DEFAULT_FN_ATTRS
855 _mm256_maskz_mullo_epi32 (__mmask8 __M
, __m256i __A
, __m256i __B
)
857 return (__m256i
) __builtin_ia32_pmulld256_mask ((__v8si
) __A
,
860 _mm256_setzero_si256 (),
864 static __inline__ __m256i __DEFAULT_FN_ATTRS
865 _mm256_mask_mullo_epi32 (__m256i __W
, __mmask8 __M
, __m256i __A
,
868 return (__m256i
) __builtin_ia32_pmulld256_mask ((__v8si
) __A
,
873 static __inline__ __m128i __DEFAULT_FN_ATTRS
874 _mm_maskz_mullo_epi32 (__mmask8 __M
, __m128i __A
, __m128i __B
)
876 return (__m128i
) __builtin_ia32_pmulld128_mask ((__v4si
) __A
,
879 _mm_setzero_si128 (),
883 static __inline__ __m128i __DEFAULT_FN_ATTRS
884 _mm_mask_mullo_epi32 (__m128i __W
, __mmask16 __M
, __m128i __A
,
887 return (__m128i
) __builtin_ia32_pmulld128_mask ((__v4si
) __A
,
892 static __inline__ __m256i __DEFAULT_FN_ATTRS
893 _mm256_mask_and_epi32(__m256i __W
, __mmask8 __U
, __m256i __A
, __m256i __B
)
895 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
896 (__v8si
)_mm256_and_si256(__A
, __B
),
900 static __inline__ __m256i __DEFAULT_FN_ATTRS
901 _mm256_maskz_and_epi32(__mmask8 __U
, __m256i __A
, __m256i __B
)
903 return (__m256i
)_mm256_mask_and_epi32(_mm256_setzero_si256(), __U
, __A
, __B
);
906 static __inline__ __m128i __DEFAULT_FN_ATTRS
907 _mm_mask_and_epi32(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
909 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
910 (__v4si
)_mm_and_si128(__A
, __B
),
914 static __inline__ __m128i __DEFAULT_FN_ATTRS
915 _mm_maskz_and_epi32(__mmask8 __U
, __m128i __A
, __m128i __B
)
917 return (__m128i
)_mm_mask_and_epi32(_mm_setzero_si128(), __U
, __A
, __B
);
920 static __inline__ __m256i __DEFAULT_FN_ATTRS
921 _mm256_mask_andnot_epi32(__m256i __W
, __mmask8 __U
, __m256i __A
, __m256i __B
)
923 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
924 (__v8si
)_mm256_andnot_si256(__A
, __B
),
928 static __inline__ __m256i __DEFAULT_FN_ATTRS
929 _mm256_maskz_andnot_epi32(__mmask8 __U
, __m256i __A
, __m256i __B
)
931 return (__m256i
)_mm256_mask_andnot_epi32(_mm256_setzero_si256(),
935 static __inline__ __m128i __DEFAULT_FN_ATTRS
936 _mm_mask_andnot_epi32(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
938 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
939 (__v4si
)_mm_andnot_si128(__A
, __B
),
943 static __inline__ __m128i __DEFAULT_FN_ATTRS
944 _mm_maskz_andnot_epi32 (__mmask8 __U
, __m128i __A
, __m128i __B
)
946 return (__m128i
)_mm_mask_andnot_epi32(_mm_setzero_si128(), __U
, __A
, __B
);
949 static __inline__ __m256i __DEFAULT_FN_ATTRS
950 _mm256_mask_or_epi32 (__m256i __W
, __mmask8 __U
, __m256i __A
, __m256i __B
)
952 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
953 (__v8si
)_mm256_or_si256(__A
, __B
),
957 static __inline__ __m256i __DEFAULT_FN_ATTRS
958 _mm256_maskz_or_epi32(__mmask8 __U
, __m256i __A
, __m256i __B
)
960 return (__m256i
)_mm256_mask_or_epi32(_mm256_setzero_si256(), __U
, __A
, __B
);
963 static __inline__ __m128i __DEFAULT_FN_ATTRS
964 _mm_mask_or_epi32(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
966 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
967 (__v4si
)_mm_or_si128(__A
, __B
),
971 static __inline__ __m128i __DEFAULT_FN_ATTRS
972 _mm_maskz_or_epi32(__mmask8 __U
, __m128i __A
, __m128i __B
)
974 return (__m128i
)_mm_mask_or_epi32(_mm_setzero_si128(), __U
, __A
, __B
);
977 static __inline__ __m256i __DEFAULT_FN_ATTRS
978 _mm256_mask_xor_epi32(__m256i __W
, __mmask8 __U
, __m256i __A
, __m256i __B
)
980 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
981 (__v8si
)_mm256_xor_si256(__A
, __B
),
985 static __inline__ __m256i __DEFAULT_FN_ATTRS
986 _mm256_maskz_xor_epi32(__mmask8 __U
, __m256i __A
, __m256i __B
)
988 return (__m256i
)_mm256_mask_xor_epi32(_mm256_setzero_si256(), __U
, __A
, __B
);
991 static __inline__ __m128i __DEFAULT_FN_ATTRS
992 _mm_mask_xor_epi32(__m128i __W
, __mmask8 __U
, __m128i __A
,
995 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
996 (__v4si
)_mm_xor_si128(__A
, __B
),
1000 static __inline__ __m128i __DEFAULT_FN_ATTRS
1001 _mm_maskz_xor_epi32(__mmask8 __U
, __m128i __A
, __m128i __B
)
1003 return (__m128i
)_mm_mask_xor_epi32(_mm_setzero_si128(), __U
, __A
, __B
);
1006 static __inline__ __m256i __DEFAULT_FN_ATTRS
1007 _mm256_mask_and_epi64(__m256i __W
, __mmask8 __U
, __m256i __A
, __m256i __B
)
1009 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
,
1010 (__v4di
)_mm256_and_si256(__A
, __B
),
1014 static __inline__ __m256i __DEFAULT_FN_ATTRS
1015 _mm256_maskz_and_epi64(__mmask8 __U
, __m256i __A
, __m256i __B
)
1017 return (__m256i
)_mm256_mask_and_epi64(_mm256_setzero_si256(), __U
, __A
, __B
);
1020 static __inline__ __m128i __DEFAULT_FN_ATTRS
1021 _mm_mask_and_epi64(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
1023 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
,
1024 (__v2di
)_mm_and_si128(__A
, __B
),
1028 static __inline__ __m128i __DEFAULT_FN_ATTRS
1029 _mm_maskz_and_epi64(__mmask8 __U
, __m128i __A
, __m128i __B
)
1031 return (__m128i
)_mm_mask_and_epi64(_mm_setzero_si128(), __U
, __A
, __B
);
1034 static __inline__ __m256i __DEFAULT_FN_ATTRS
1035 _mm256_mask_andnot_epi64(__m256i __W
, __mmask8 __U
, __m256i __A
, __m256i __B
)
1037 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
,
1038 (__v4di
)_mm256_andnot_si256(__A
, __B
),
1042 static __inline__ __m256i __DEFAULT_FN_ATTRS
1043 _mm256_maskz_andnot_epi64(__mmask8 __U
, __m256i __A
, __m256i __B
)
1045 return (__m256i
)_mm256_mask_andnot_epi64(_mm256_setzero_si256(),
1049 static __inline__ __m128i __DEFAULT_FN_ATTRS
1050 _mm_mask_andnot_epi64(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
1052 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
,
1053 (__v2di
)_mm_andnot_si128(__A
, __B
),
1057 static __inline__ __m128i __DEFAULT_FN_ATTRS
1058 _mm_maskz_andnot_epi64(__mmask8 __U
, __m128i __A
, __m128i __B
)
1060 return (__m128i
)_mm_mask_andnot_epi64(_mm_setzero_si128(), __U
, __A
, __B
);
1063 static __inline__ __m256i __DEFAULT_FN_ATTRS
1064 _mm256_mask_or_epi64(__m256i __W
, __mmask8 __U
, __m256i __A
, __m256i __B
)
1066 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
,
1067 (__v4di
)_mm256_or_si256(__A
, __B
),
1071 static __inline__ __m256i __DEFAULT_FN_ATTRS
1072 _mm256_maskz_or_epi64(__mmask8 __U
, __m256i __A
, __m256i __B
)
1074 return (__m256i
)_mm256_mask_or_epi64(_mm256_setzero_si256(), __U
, __A
, __B
);
1077 static __inline__ __m128i __DEFAULT_FN_ATTRS
1078 _mm_mask_or_epi64(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
1080 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
,
1081 (__v2di
)_mm_or_si128(__A
, __B
),
1085 static __inline__ __m128i __DEFAULT_FN_ATTRS
1086 _mm_maskz_or_epi64(__mmask8 __U
, __m128i __A
, __m128i __B
)
1088 return (__m128i
)_mm_mask_or_epi64(_mm_setzero_si128(), __U
, __A
, __B
);
1091 static __inline__ __m256i __DEFAULT_FN_ATTRS
1092 _mm256_mask_xor_epi64(__m256i __W
, __mmask8 __U
, __m256i __A
, __m256i __B
)
1094 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
,
1095 (__v4di
)_mm256_xor_si256(__A
, __B
),
1099 static __inline__ __m256i __DEFAULT_FN_ATTRS
1100 _mm256_maskz_xor_epi64(__mmask8 __U
, __m256i __A
, __m256i __B
)
1102 return (__m256i
)_mm256_mask_xor_epi64(_mm256_setzero_si256(), __U
, __A
, __B
);
1105 static __inline__ __m128i __DEFAULT_FN_ATTRS
1106 _mm_mask_xor_epi64(__m128i __W
, __mmask8 __U
, __m128i __A
,
1109 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
,
1110 (__v2di
)_mm_xor_si128(__A
, __B
),
1114 static __inline__ __m128i __DEFAULT_FN_ATTRS
1115 _mm_maskz_xor_epi64(__mmask8 __U
, __m128i __A
, __m128i __B
)
1117 return (__m128i
)_mm_mask_xor_epi64(_mm_setzero_si128(), __U
, __A
, __B
);
1120 #define _mm_cmp_epi32_mask(a, b, p) __extension__ ({ \
1121 (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
1122 (__v4si)(__m128i)(b), (int)(p), \
1125 #define _mm_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
1126 (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
1127 (__v4si)(__m128i)(b), (int)(p), \
1130 #define _mm_cmp_epu32_mask(a, b, p) __extension__ ({ \
1131 (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
1132 (__v4si)(__m128i)(b), (int)(p), \
1135 #define _mm_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
1136 (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
1137 (__v4si)(__m128i)(b), (int)(p), \
1140 #define _mm256_cmp_epi32_mask(a, b, p) __extension__ ({ \
1141 (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
1142 (__v8si)(__m256i)(b), (int)(p), \
1145 #define _mm256_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
1146 (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
1147 (__v8si)(__m256i)(b), (int)(p), \
1150 #define _mm256_cmp_epu32_mask(a, b, p) __extension__ ({ \
1151 (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
1152 (__v8si)(__m256i)(b), (int)(p), \
1155 #define _mm256_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
1156 (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
1157 (__v8si)(__m256i)(b), (int)(p), \
1160 #define _mm_cmp_epi64_mask(a, b, p) __extension__ ({ \
1161 (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
1162 (__v2di)(__m128i)(b), (int)(p), \
1165 #define _mm_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
1166 (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
1167 (__v2di)(__m128i)(b), (int)(p), \
1170 #define _mm_cmp_epu64_mask(a, b, p) __extension__ ({ \
1171 (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
1172 (__v2di)(__m128i)(b), (int)(p), \
1175 #define _mm_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
1176 (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
1177 (__v2di)(__m128i)(b), (int)(p), \
1180 #define _mm256_cmp_epi64_mask(a, b, p) __extension__ ({ \
1181 (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
1182 (__v4di)(__m256i)(b), (int)(p), \
1185 #define _mm256_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
1186 (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
1187 (__v4di)(__m256i)(b), (int)(p), \
1190 #define _mm256_cmp_epu64_mask(a, b, p) __extension__ ({ \
1191 (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
1192 (__v4di)(__m256i)(b), (int)(p), \
1195 #define _mm256_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
1196 (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
1197 (__v4di)(__m256i)(b), (int)(p), \
1200 #define _mm256_cmp_ps_mask(a, b, p) __extension__ ({ \
1201 (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
1202 (__v8sf)(__m256)(b), (int)(p), \
1205 #define _mm256_mask_cmp_ps_mask(m, a, b, p) __extension__ ({ \
1206 (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
1207 (__v8sf)(__m256)(b), (int)(p), \
1210 #define _mm256_cmp_pd_mask(a, b, p) __extension__ ({ \
1211 (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
1212 (__v4df)(__m256d)(b), (int)(p), \
1215 #define _mm256_mask_cmp_pd_mask(m, a, b, p) __extension__ ({ \
1216 (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
1217 (__v4df)(__m256d)(b), (int)(p), \
1220 #define _mm_cmp_ps_mask(a, b, p) __extension__ ({ \
1221 (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
1222 (__v4sf)(__m128)(b), (int)(p), \
1225 #define _mm_mask_cmp_ps_mask(m, a, b, p) __extension__ ({ \
1226 (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
1227 (__v4sf)(__m128)(b), (int)(p), \
1230 #define _mm_cmp_pd_mask(a, b, p) __extension__ ({ \
1231 (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
1232 (__v2df)(__m128d)(b), (int)(p), \
1235 #define _mm_mask_cmp_pd_mask(m, a, b, p) __extension__ ({ \
1236 (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
1237 (__v2df)(__m128d)(b), (int)(p), \
1240 static __inline__ __m128d __DEFAULT_FN_ATTRS
1241 _mm_mask_fmadd_pd(__m128d __A
, __mmask8 __U
, __m128d __B
, __m128d __C
)
1243 return (__m128d
) __builtin_ia32_vfmaddpd128_mask ((__v2df
) __A
,
1249 static __inline__ __m128d __DEFAULT_FN_ATTRS
1250 _mm_mask3_fmadd_pd(__m128d __A
, __m128d __B
, __m128d __C
, __mmask8 __U
)
1252 return (__m128d
) __builtin_ia32_vfmaddpd128_mask3 ((__v2df
) __A
,
1258 static __inline__ __m128d __DEFAULT_FN_ATTRS
1259 _mm_maskz_fmadd_pd(__mmask8 __U
, __m128d __A
, __m128d __B
, __m128d __C
)
1261 return (__m128d
) __builtin_ia32_vfmaddpd128_maskz ((__v2df
) __A
,
1267 static __inline__ __m128d __DEFAULT_FN_ATTRS
1268 _mm_mask_fmsub_pd(__m128d __A
, __mmask8 __U
, __m128d __B
, __m128d __C
)
1270 return (__m128d
) __builtin_ia32_vfmaddpd128_mask ((__v2df
) __A
,
1276 static __inline__ __m128d __DEFAULT_FN_ATTRS
1277 _mm_maskz_fmsub_pd(__mmask8 __U
, __m128d __A
, __m128d __B
, __m128d __C
)
1279 return (__m128d
) __builtin_ia32_vfmaddpd128_maskz ((__v2df
) __A
,
1285 static __inline__ __m128d __DEFAULT_FN_ATTRS
1286 _mm_mask3_fnmadd_pd(__m128d __A
, __m128d __B
, __m128d __C
, __mmask8 __U
)
1288 return (__m128d
) __builtin_ia32_vfmaddpd128_mask3 (-(__v2df
) __A
,
1294 static __inline__ __m128d __DEFAULT_FN_ATTRS
1295 _mm_maskz_fnmadd_pd(__mmask8 __U
, __m128d __A
, __m128d __B
, __m128d __C
)
1297 return (__m128d
) __builtin_ia32_vfmaddpd128_maskz (-(__v2df
) __A
,
1303 static __inline__ __m128d __DEFAULT_FN_ATTRS
1304 _mm_maskz_fnmsub_pd(__mmask8 __U
, __m128d __A
, __m128d __B
, __m128d __C
)
1306 return (__m128d
) __builtin_ia32_vfmaddpd128_maskz (-(__v2df
) __A
,
1312 static __inline__ __m256d __DEFAULT_FN_ATTRS
1313 _mm256_mask_fmadd_pd(__m256d __A
, __mmask8 __U
, __m256d __B
, __m256d __C
)
1315 return (__m256d
) __builtin_ia32_vfmaddpd256_mask ((__v4df
) __A
,
1321 static __inline__ __m256d __DEFAULT_FN_ATTRS
1322 _mm256_mask3_fmadd_pd(__m256d __A
, __m256d __B
, __m256d __C
, __mmask8 __U
)
1324 return (__m256d
) __builtin_ia32_vfmaddpd256_mask3 ((__v4df
) __A
,
1330 static __inline__ __m256d __DEFAULT_FN_ATTRS
1331 _mm256_maskz_fmadd_pd(__mmask8 __U
, __m256d __A
, __m256d __B
, __m256d __C
)
1333 return (__m256d
) __builtin_ia32_vfmaddpd256_maskz ((__v4df
) __A
,
1339 static __inline__ __m256d __DEFAULT_FN_ATTRS
1340 _mm256_mask_fmsub_pd(__m256d __A
, __mmask8 __U
, __m256d __B
, __m256d __C
)
1342 return (__m256d
) __builtin_ia32_vfmaddpd256_mask ((__v4df
) __A
,
1348 static __inline__ __m256d __DEFAULT_FN_ATTRS
1349 _mm256_maskz_fmsub_pd(__mmask8 __U
, __m256d __A
, __m256d __B
, __m256d __C
)
1351 return (__m256d
) __builtin_ia32_vfmaddpd256_maskz ((__v4df
) __A
,
1357 static __inline__ __m256d __DEFAULT_FN_ATTRS
1358 _mm256_mask3_fnmadd_pd(__m256d __A
, __m256d __B
, __m256d __C
, __mmask8 __U
)
1360 return (__m256d
) __builtin_ia32_vfmaddpd256_mask3 (-(__v4df
) __A
,
1366 static __inline__ __m256d __DEFAULT_FN_ATTRS
1367 _mm256_maskz_fnmadd_pd(__mmask8 __U
, __m256d __A
, __m256d __B
, __m256d __C
)
1369 return (__m256d
) __builtin_ia32_vfmaddpd256_maskz (-(__v4df
) __A
,
1375 static __inline__ __m256d __DEFAULT_FN_ATTRS
1376 _mm256_maskz_fnmsub_pd(__mmask8 __U
, __m256d __A
, __m256d __B
, __m256d __C
)
1378 return (__m256d
) __builtin_ia32_vfmaddpd256_maskz (-(__v4df
) __A
,
1384 static __inline__ __m128 __DEFAULT_FN_ATTRS
1385 _mm_mask_fmadd_ps(__m128 __A
, __mmask8 __U
, __m128 __B
, __m128 __C
)
1387 return (__m128
) __builtin_ia32_vfmaddps128_mask ((__v4sf
) __A
,
1393 static __inline__ __m128 __DEFAULT_FN_ATTRS
1394 _mm_mask3_fmadd_ps(__m128 __A
, __m128 __B
, __m128 __C
, __mmask8 __U
)
1396 return (__m128
) __builtin_ia32_vfmaddps128_mask3 ((__v4sf
) __A
,
1402 static __inline__ __m128 __DEFAULT_FN_ATTRS
1403 _mm_maskz_fmadd_ps(__mmask8 __U
, __m128 __A
, __m128 __B
, __m128 __C
)
1405 return (__m128
) __builtin_ia32_vfmaddps128_maskz ((__v4sf
) __A
,
1411 static __inline__ __m128 __DEFAULT_FN_ATTRS
1412 _mm_mask_fmsub_ps(__m128 __A
, __mmask8 __U
, __m128 __B
, __m128 __C
)
1414 return (__m128
) __builtin_ia32_vfmaddps128_mask ((__v4sf
) __A
,
1420 static __inline__ __m128 __DEFAULT_FN_ATTRS
1421 _mm_maskz_fmsub_ps(__mmask8 __U
, __m128 __A
, __m128 __B
, __m128 __C
)
1423 return (__m128
) __builtin_ia32_vfmaddps128_maskz ((__v4sf
) __A
,
1429 static __inline__ __m128 __DEFAULT_FN_ATTRS
1430 _mm_mask3_fnmadd_ps(__m128 __A
, __m128 __B
, __m128 __C
, __mmask8 __U
)
1432 return (__m128
) __builtin_ia32_vfmaddps128_mask3 (-(__v4sf
) __A
,
1438 static __inline__ __m128 __DEFAULT_FN_ATTRS
1439 _mm_maskz_fnmadd_ps(__mmask8 __U
, __m128 __A
, __m128 __B
, __m128 __C
)
1441 return (__m128
) __builtin_ia32_vfmaddps128_maskz (-(__v4sf
) __A
,
1447 static __inline__ __m128 __DEFAULT_FN_ATTRS
1448 _mm_maskz_fnmsub_ps(__mmask8 __U
, __m128 __A
, __m128 __B
, __m128 __C
)
1450 return (__m128
) __builtin_ia32_vfmaddps128_maskz (-(__v4sf
) __A
,
1456 static __inline__ __m256 __DEFAULT_FN_ATTRS
1457 _mm256_mask_fmadd_ps(__m256 __A
, __mmask8 __U
, __m256 __B
, __m256 __C
)
1459 return (__m256
) __builtin_ia32_vfmaddps256_mask ((__v8sf
) __A
,
1465 static __inline__ __m256 __DEFAULT_FN_ATTRS
1466 _mm256_mask3_fmadd_ps(__m256 __A
, __m256 __B
, __m256 __C
, __mmask8 __U
)
1468 return (__m256
) __builtin_ia32_vfmaddps256_mask3 ((__v8sf
) __A
,
1474 static __inline__ __m256 __DEFAULT_FN_ATTRS
1475 _mm256_maskz_fmadd_ps(__mmask8 __U
, __m256 __A
, __m256 __B
, __m256 __C
)
1477 return (__m256
) __builtin_ia32_vfmaddps256_maskz ((__v8sf
) __A
,
1483 static __inline__ __m256 __DEFAULT_FN_ATTRS
1484 _mm256_mask_fmsub_ps(__m256 __A
, __mmask8 __U
, __m256 __B
, __m256 __C
)
1486 return (__m256
) __builtin_ia32_vfmaddps256_mask ((__v8sf
) __A
,
1492 static __inline__ __m256 __DEFAULT_FN_ATTRS
1493 _mm256_maskz_fmsub_ps(__mmask8 __U
, __m256 __A
, __m256 __B
, __m256 __C
)
1495 return (__m256
) __builtin_ia32_vfmaddps256_maskz ((__v8sf
) __A
,
1501 static __inline__ __m256 __DEFAULT_FN_ATTRS
1502 _mm256_mask3_fnmadd_ps(__m256 __A
, __m256 __B
, __m256 __C
, __mmask8 __U
)
1504 return (__m256
) __builtin_ia32_vfmaddps256_mask3 (-(__v8sf
) __A
,
1510 static __inline__ __m256 __DEFAULT_FN_ATTRS
1511 _mm256_maskz_fnmadd_ps(__mmask8 __U
, __m256 __A
, __m256 __B
, __m256 __C
)
1513 return (__m256
) __builtin_ia32_vfmaddps256_maskz (-(__v8sf
) __A
,
1519 static __inline__ __m256 __DEFAULT_FN_ATTRS
1520 _mm256_maskz_fnmsub_ps(__mmask8 __U
, __m256 __A
, __m256 __B
, __m256 __C
)
1522 return (__m256
) __builtin_ia32_vfmaddps256_maskz (-(__v8sf
) __A
,
1528 static __inline__ __m128d __DEFAULT_FN_ATTRS
1529 _mm_mask_fmaddsub_pd(__m128d __A
, __mmask8 __U
, __m128d __B
, __m128d __C
)
1531 return (__m128d
) __builtin_ia32_vfmaddsubpd128_mask ((__v2df
) __A
,
1537 static __inline__ __m128d __DEFAULT_FN_ATTRS
1538 _mm_mask3_fmaddsub_pd(__m128d __A
, __m128d __B
, __m128d __C
, __mmask8 __U
)
1540 return (__m128d
) __builtin_ia32_vfmaddsubpd128_mask3 ((__v2df
) __A
,
1547 static __inline__ __m128d __DEFAULT_FN_ATTRS
1548 _mm_maskz_fmaddsub_pd(__mmask8 __U
, __m128d __A
, __m128d __B
, __m128d __C
)
1550 return (__m128d
) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df
) __A
,
1557 static __inline__ __m128d __DEFAULT_FN_ATTRS
1558 _mm_mask_fmsubadd_pd(__m128d __A
, __mmask8 __U
, __m128d __B
, __m128d __C
)
1560 return (__m128d
) __builtin_ia32_vfmaddsubpd128_mask ((__v2df
) __A
,
1566 static __inline__ __m128d __DEFAULT_FN_ATTRS
1567 _mm_maskz_fmsubadd_pd(__mmask8 __U
, __m128d __A
, __m128d __B
, __m128d __C
)
1569 return (__m128d
) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df
) __A
,
1576 static __inline__ __m256d __DEFAULT_FN_ATTRS
1577 _mm256_mask_fmaddsub_pd(__m256d __A
, __mmask8 __U
, __m256d __B
, __m256d __C
)
1579 return (__m256d
) __builtin_ia32_vfmaddsubpd256_mask ((__v4df
) __A
,
1585 static __inline__ __m256d __DEFAULT_FN_ATTRS
1586 _mm256_mask3_fmaddsub_pd(__m256d __A
, __m256d __B
, __m256d __C
, __mmask8 __U
)
1588 return (__m256d
) __builtin_ia32_vfmaddsubpd256_mask3 ((__v4df
) __A
,
1595 static __inline__ __m256d __DEFAULT_FN_ATTRS
1596 _mm256_maskz_fmaddsub_pd(__mmask8 __U
, __m256d __A
, __m256d __B
, __m256d __C
)
1598 return (__m256d
) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df
) __A
,
1605 static __inline__ __m256d __DEFAULT_FN_ATTRS
1606 _mm256_mask_fmsubadd_pd(__m256d __A
, __mmask8 __U
, __m256d __B
, __m256d __C
)
1608 return (__m256d
) __builtin_ia32_vfmaddsubpd256_mask ((__v4df
) __A
,
1614 static __inline__ __m256d __DEFAULT_FN_ATTRS
1615 _mm256_maskz_fmsubadd_pd(__mmask8 __U
, __m256d __A
, __m256d __B
, __m256d __C
)
1617 return (__m256d
) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df
) __A
,
1624 static __inline__ __m128 __DEFAULT_FN_ATTRS
1625 _mm_mask_fmaddsub_ps(__m128 __A
, __mmask8 __U
, __m128 __B
, __m128 __C
)
1627 return (__m128
) __builtin_ia32_vfmaddsubps128_mask ((__v4sf
) __A
,
1633 static __inline__ __m128 __DEFAULT_FN_ATTRS
1634 _mm_mask3_fmaddsub_ps(__m128 __A
, __m128 __B
, __m128 __C
, __mmask8 __U
)
1636 return (__m128
) __builtin_ia32_vfmaddsubps128_mask3 ((__v4sf
) __A
,
1642 static __inline__ __m128 __DEFAULT_FN_ATTRS
1643 _mm_maskz_fmaddsub_ps(__mmask8 __U
, __m128 __A
, __m128 __B
, __m128 __C
)
1645 return (__m128
) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf
) __A
,
1651 static __inline__ __m128 __DEFAULT_FN_ATTRS
1652 _mm_mask_fmsubadd_ps(__m128 __A
, __mmask8 __U
, __m128 __B
, __m128 __C
)
1654 return (__m128
) __builtin_ia32_vfmaddsubps128_mask ((__v4sf
) __A
,
1660 static __inline__ __m128 __DEFAULT_FN_ATTRS
1661 _mm_maskz_fmsubadd_ps(__mmask8 __U
, __m128 __A
, __m128 __B
, __m128 __C
)
1663 return (__m128
) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf
) __A
,
1669 static __inline__ __m256 __DEFAULT_FN_ATTRS
1670 _mm256_mask_fmaddsub_ps(__m256 __A
, __mmask8 __U
, __m256 __B
,
1673 return (__m256
) __builtin_ia32_vfmaddsubps256_mask ((__v8sf
) __A
,
1679 static __inline__ __m256 __DEFAULT_FN_ATTRS
1680 _mm256_mask3_fmaddsub_ps(__m256 __A
, __m256 __B
, __m256 __C
, __mmask8 __U
)
1682 return (__m256
) __builtin_ia32_vfmaddsubps256_mask3 ((__v8sf
) __A
,
1688 static __inline__ __m256 __DEFAULT_FN_ATTRS
1689 _mm256_maskz_fmaddsub_ps(__mmask8 __U
, __m256 __A
, __m256 __B
, __m256 __C
)
1691 return (__m256
) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf
) __A
,
1697 static __inline__ __m256 __DEFAULT_FN_ATTRS
1698 _mm256_mask_fmsubadd_ps(__m256 __A
, __mmask8 __U
, __m256 __B
, __m256 __C
)
1700 return (__m256
) __builtin_ia32_vfmaddsubps256_mask ((__v8sf
) __A
,
1706 static __inline__ __m256 __DEFAULT_FN_ATTRS
1707 _mm256_maskz_fmsubadd_ps(__mmask8 __U
, __m256 __A
, __m256 __B
, __m256 __C
)
1709 return (__m256
) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf
) __A
,
1715 static __inline__ __m128d __DEFAULT_FN_ATTRS
1716 _mm_mask3_fmsub_pd(__m128d __A
, __m128d __B
, __m128d __C
, __mmask8 __U
)
1718 return (__m128d
) __builtin_ia32_vfmsubpd128_mask3 ((__v2df
) __A
,
1724 static __inline__ __m256d __DEFAULT_FN_ATTRS
1725 _mm256_mask3_fmsub_pd(__m256d __A
, __m256d __B
, __m256d __C
, __mmask8 __U
)
1727 return (__m256d
) __builtin_ia32_vfmsubpd256_mask3 ((__v4df
) __A
,
1733 static __inline__ __m128 __DEFAULT_FN_ATTRS
1734 _mm_mask3_fmsub_ps(__m128 __A
, __m128 __B
, __m128 __C
, __mmask8 __U
)
1736 return (__m128
) __builtin_ia32_vfmsubps128_mask3 ((__v4sf
) __A
,
1742 static __inline__ __m256 __DEFAULT_FN_ATTRS
1743 _mm256_mask3_fmsub_ps(__m256 __A
, __m256 __B
, __m256 __C
, __mmask8 __U
)
1745 return (__m256
) __builtin_ia32_vfmsubps256_mask3 ((__v8sf
) __A
,
1751 static __inline__ __m128d __DEFAULT_FN_ATTRS
1752 _mm_mask3_fmsubadd_pd(__m128d __A
, __m128d __B
, __m128d __C
, __mmask8 __U
)
1754 return (__m128d
) __builtin_ia32_vfmsubaddpd128_mask3 ((__v2df
) __A
,
1761 static __inline__ __m256d __DEFAULT_FN_ATTRS
1762 _mm256_mask3_fmsubadd_pd(__m256d __A
, __m256d __B
, __m256d __C
, __mmask8 __U
)
1764 return (__m256d
) __builtin_ia32_vfmsubaddpd256_mask3 ((__v4df
) __A
,
1771 static __inline__ __m128 __DEFAULT_FN_ATTRS
1772 _mm_mask3_fmsubadd_ps(__m128 __A
, __m128 __B
, __m128 __C
, __mmask8 __U
)
1774 return (__m128
) __builtin_ia32_vfmsubaddps128_mask3 ((__v4sf
) __A
,
1780 static __inline__ __m256 __DEFAULT_FN_ATTRS
1781 _mm256_mask3_fmsubadd_ps(__m256 __A
, __m256 __B
, __m256 __C
, __mmask8 __U
)
1783 return (__m256
) __builtin_ia32_vfmsubaddps256_mask3 ((__v8sf
) __A
,
1789 static __inline__ __m128d __DEFAULT_FN_ATTRS
1790 _mm_mask_fnmadd_pd(__m128d __A
, __mmask8 __U
, __m128d __B
, __m128d __C
)
1792 return (__m128d
) __builtin_ia32_vfnmaddpd128_mask ((__v2df
) __A
,
1798 static __inline__ __m256d __DEFAULT_FN_ATTRS
1799 _mm256_mask_fnmadd_pd(__m256d __A
, __mmask8 __U
, __m256d __B
, __m256d __C
)
1801 return (__m256d
) __builtin_ia32_vfnmaddpd256_mask ((__v4df
) __A
,
1807 static __inline__ __m128 __DEFAULT_FN_ATTRS
1808 _mm_mask_fnmadd_ps(__m128 __A
, __mmask8 __U
, __m128 __B
, __m128 __C
)
1810 return (__m128
) __builtin_ia32_vfnmaddps128_mask ((__v4sf
) __A
,
1816 static __inline__ __m256 __DEFAULT_FN_ATTRS
1817 _mm256_mask_fnmadd_ps(__m256 __A
, __mmask8 __U
, __m256 __B
, __m256 __C
)
1819 return (__m256
) __builtin_ia32_vfnmaddps256_mask ((__v8sf
) __A
,
1825 static __inline__ __m128d __DEFAULT_FN_ATTRS
1826 _mm_mask_fnmsub_pd(__m128d __A
, __mmask8 __U
, __m128d __B
, __m128d __C
)
1828 return (__m128d
) __builtin_ia32_vfnmsubpd128_mask ((__v2df
) __A
,
1834 static __inline__ __m128d __DEFAULT_FN_ATTRS
1835 _mm_mask3_fnmsub_pd(__m128d __A
, __m128d __B
, __m128d __C
, __mmask8 __U
)
1837 return (__m128d
) __builtin_ia32_vfnmsubpd128_mask3 ((__v2df
) __A
,
1843 static __inline__ __m256d __DEFAULT_FN_ATTRS
1844 _mm256_mask_fnmsub_pd(__m256d __A
, __mmask8 __U
, __m256d __B
, __m256d __C
)
1846 return (__m256d
) __builtin_ia32_vfnmsubpd256_mask ((__v4df
) __A
,
1852 static __inline__ __m256d __DEFAULT_FN_ATTRS
1853 _mm256_mask3_fnmsub_pd(__m256d __A
, __m256d __B
, __m256d __C
, __mmask8 __U
)
1855 return (__m256d
) __builtin_ia32_vfnmsubpd256_mask3 ((__v4df
) __A
,
1861 static __inline__ __m128 __DEFAULT_FN_ATTRS
1862 _mm_mask_fnmsub_ps(__m128 __A
, __mmask8 __U
, __m128 __B
, __m128 __C
)
1864 return (__m128
) __builtin_ia32_vfnmsubps128_mask ((__v4sf
) __A
,
1870 static __inline__ __m128 __DEFAULT_FN_ATTRS
1871 _mm_mask3_fnmsub_ps(__m128 __A
, __m128 __B
, __m128 __C
, __mmask8 __U
)
1873 return (__m128
) __builtin_ia32_vfnmsubps128_mask3 ((__v4sf
) __A
,
1879 static __inline__ __m256 __DEFAULT_FN_ATTRS
1880 _mm256_mask_fnmsub_ps(__m256 __A
, __mmask8 __U
, __m256 __B
, __m256 __C
)
1882 return (__m256
) __builtin_ia32_vfnmsubps256_mask ((__v8sf
) __A
,
1888 static __inline__ __m256 __DEFAULT_FN_ATTRS
1889 _mm256_mask3_fnmsub_ps(__m256 __A
, __m256 __B
, __m256 __C
, __mmask8 __U
)
1891 return (__m256
) __builtin_ia32_vfnmsubps256_mask3 ((__v8sf
) __A
,
1897 static __inline__ __m128d __DEFAULT_FN_ATTRS
1898 _mm_mask_add_pd (__m128d __W
, __mmask8 __U
, __m128d __A
, __m128d __B
) {
1899 return (__m128d
) __builtin_ia32_addpd128_mask ((__v2df
) __A
,
1905 static __inline__ __m128d __DEFAULT_FN_ATTRS
1906 _mm_maskz_add_pd (__mmask8 __U
, __m128d __A
, __m128d __B
) {
1907 return (__m128d
) __builtin_ia32_addpd128_mask ((__v2df
) __A
,
1914 static __inline__ __m256d __DEFAULT_FN_ATTRS
1915 _mm256_mask_add_pd (__m256d __W
, __mmask8 __U
, __m256d __A
, __m256d __B
) {
1916 return (__m256d
) __builtin_ia32_addpd256_mask ((__v4df
) __A
,
1922 static __inline__ __m256d __DEFAULT_FN_ATTRS
1923 _mm256_maskz_add_pd (__mmask8 __U
, __m256d __A
, __m256d __B
) {
1924 return (__m256d
) __builtin_ia32_addpd256_mask ((__v4df
) __A
,
1927 _mm256_setzero_pd (),
1931 static __inline__ __m128 __DEFAULT_FN_ATTRS
1932 _mm_mask_add_ps (__m128 __W
, __mmask16 __U
, __m128 __A
, __m128 __B
) {
1933 return (__m128
) __builtin_ia32_addps128_mask ((__v4sf
) __A
,
1939 static __inline__ __m128 __DEFAULT_FN_ATTRS
1940 _mm_maskz_add_ps (__mmask16 __U
, __m128 __A
, __m128 __B
) {
1941 return (__m128
) __builtin_ia32_addps128_mask ((__v4sf
) __A
,
1948 static __inline__ __m256 __DEFAULT_FN_ATTRS
1949 _mm256_mask_add_ps (__m256 __W
, __mmask16 __U
, __m256 __A
, __m256 __B
) {
1950 return (__m256
) __builtin_ia32_addps256_mask ((__v8sf
) __A
,
1956 static __inline__ __m256 __DEFAULT_FN_ATTRS
1957 _mm256_maskz_add_ps (__mmask16 __U
, __m256 __A
, __m256 __B
) {
1958 return (__m256
) __builtin_ia32_addps256_mask ((__v8sf
) __A
,
1961 _mm256_setzero_ps (),
1965 static __inline__ __m128i __DEFAULT_FN_ATTRS
1966 _mm_mask_blend_epi32 (__mmask8 __U
, __m128i __A
, __m128i __W
) {
1967 return (__m128i
) __builtin_ia32_selectd_128 ((__mmask8
) __U
,
1972 static __inline__ __m256i __DEFAULT_FN_ATTRS
1973 _mm256_mask_blend_epi32 (__mmask8 __U
, __m256i __A
, __m256i __W
) {
1974 return (__m256i
) __builtin_ia32_selectd_256 ((__mmask8
) __U
,
1979 static __inline__ __m128d __DEFAULT_FN_ATTRS
1980 _mm_mask_blend_pd (__mmask8 __U
, __m128d __A
, __m128d __W
) {
1981 return (__m128d
) __builtin_ia32_selectpd_128 ((__mmask8
) __U
,
1986 static __inline__ __m256d __DEFAULT_FN_ATTRS
1987 _mm256_mask_blend_pd (__mmask8 __U
, __m256d __A
, __m256d __W
) {
1988 return (__m256d
) __builtin_ia32_selectpd_256 ((__mmask8
) __U
,
1993 static __inline__ __m128 __DEFAULT_FN_ATTRS
1994 _mm_mask_blend_ps (__mmask8 __U
, __m128 __A
, __m128 __W
) {
1995 return (__m128
) __builtin_ia32_selectps_128 ((__mmask8
) __U
,
2000 static __inline__ __m256 __DEFAULT_FN_ATTRS
2001 _mm256_mask_blend_ps (__mmask8 __U
, __m256 __A
, __m256 __W
) {
2002 return (__m256
) __builtin_ia32_selectps_256 ((__mmask8
) __U
,
2007 static __inline__ __m128i __DEFAULT_FN_ATTRS
2008 _mm_mask_blend_epi64 (__mmask8 __U
, __m128i __A
, __m128i __W
) {
2009 return (__m128i
) __builtin_ia32_selectq_128 ((__mmask8
) __U
,
2014 static __inline__ __m256i __DEFAULT_FN_ATTRS
2015 _mm256_mask_blend_epi64 (__mmask8 __U
, __m256i __A
, __m256i __W
) {
2016 return (__m256i
) __builtin_ia32_selectq_256 ((__mmask8
) __U
,
2021 static __inline__ __m128d __DEFAULT_FN_ATTRS
2022 _mm_mask_compress_pd (__m128d __W
, __mmask8 __U
, __m128d __A
) {
2023 return (__m128d
) __builtin_ia32_compressdf128_mask ((__v2df
) __A
,
2028 static __inline__ __m128d __DEFAULT_FN_ATTRS
2029 _mm_maskz_compress_pd (__mmask8 __U
, __m128d __A
) {
2030 return (__m128d
) __builtin_ia32_compressdf128_mask ((__v2df
) __A
,
2036 static __inline__ __m256d __DEFAULT_FN_ATTRS
2037 _mm256_mask_compress_pd (__m256d __W
, __mmask8 __U
, __m256d __A
) {
2038 return (__m256d
) __builtin_ia32_compressdf256_mask ((__v4df
) __A
,
2043 static __inline__ __m256d __DEFAULT_FN_ATTRS
2044 _mm256_maskz_compress_pd (__mmask8 __U
, __m256d __A
) {
2045 return (__m256d
) __builtin_ia32_compressdf256_mask ((__v4df
) __A
,
2047 _mm256_setzero_pd (),
2051 static __inline__ __m128i __DEFAULT_FN_ATTRS
2052 _mm_mask_compress_epi64 (__m128i __W
, __mmask8 __U
, __m128i __A
) {
2053 return (__m128i
) __builtin_ia32_compressdi128_mask ((__v2di
) __A
,
2058 static __inline__ __m128i __DEFAULT_FN_ATTRS
2059 _mm_maskz_compress_epi64 (__mmask8 __U
, __m128i __A
) {
2060 return (__m128i
) __builtin_ia32_compressdi128_mask ((__v2di
) __A
,
2062 _mm_setzero_si128 (),
2066 static __inline__ __m256i __DEFAULT_FN_ATTRS
2067 _mm256_mask_compress_epi64 (__m256i __W
, __mmask8 __U
, __m256i __A
) {
2068 return (__m256i
) __builtin_ia32_compressdi256_mask ((__v4di
) __A
,
2073 static __inline__ __m256i __DEFAULT_FN_ATTRS
2074 _mm256_maskz_compress_epi64 (__mmask8 __U
, __m256i __A
) {
2075 return (__m256i
) __builtin_ia32_compressdi256_mask ((__v4di
) __A
,
2077 _mm256_setzero_si256 (),
2081 static __inline__ __m128 __DEFAULT_FN_ATTRS
2082 _mm_mask_compress_ps (__m128 __W
, __mmask8 __U
, __m128 __A
) {
2083 return (__m128
) __builtin_ia32_compresssf128_mask ((__v4sf
) __A
,
2088 static __inline__ __m128 __DEFAULT_FN_ATTRS
2089 _mm_maskz_compress_ps (__mmask8 __U
, __m128 __A
) {
2090 return (__m128
) __builtin_ia32_compresssf128_mask ((__v4sf
) __A
,
2096 static __inline__ __m256 __DEFAULT_FN_ATTRS
2097 _mm256_mask_compress_ps (__m256 __W
, __mmask8 __U
, __m256 __A
) {
2098 return (__m256
) __builtin_ia32_compresssf256_mask ((__v8sf
) __A
,
2103 static __inline__ __m256 __DEFAULT_FN_ATTRS
2104 _mm256_maskz_compress_ps (__mmask8 __U
, __m256 __A
) {
2105 return (__m256
) __builtin_ia32_compresssf256_mask ((__v8sf
) __A
,
2107 _mm256_setzero_ps (),
2111 static __inline__ __m128i __DEFAULT_FN_ATTRS
2112 _mm_mask_compress_epi32 (__m128i __W
, __mmask8 __U
, __m128i __A
) {
2113 return (__m128i
) __builtin_ia32_compresssi128_mask ((__v4si
) __A
,
2118 static __inline__ __m128i __DEFAULT_FN_ATTRS
2119 _mm_maskz_compress_epi32 (__mmask8 __U
, __m128i __A
) {
2120 return (__m128i
) __builtin_ia32_compresssi128_mask ((__v4si
) __A
,
2122 _mm_setzero_si128 (),
2126 static __inline__ __m256i __DEFAULT_FN_ATTRS
2127 _mm256_mask_compress_epi32 (__m256i __W
, __mmask8 __U
, __m256i __A
) {
2128 return (__m256i
) __builtin_ia32_compresssi256_mask ((__v8si
) __A
,
2133 static __inline__ __m256i __DEFAULT_FN_ATTRS
2134 _mm256_maskz_compress_epi32 (__mmask8 __U
, __m256i __A
) {
2135 return (__m256i
) __builtin_ia32_compresssi256_mask ((__v8si
) __A
,
2137 _mm256_setzero_si256 (),
2141 static __inline__
void __DEFAULT_FN_ATTRS
2142 _mm_mask_compressstoreu_pd (void *__P
, __mmask8 __U
, __m128d __A
) {
2143 __builtin_ia32_compressstoredf128_mask ((__v2df
*) __P
,
2148 static __inline__
void __DEFAULT_FN_ATTRS
2149 _mm256_mask_compressstoreu_pd (void *__P
, __mmask8 __U
, __m256d __A
) {
2150 __builtin_ia32_compressstoredf256_mask ((__v4df
*) __P
,
2155 static __inline__
void __DEFAULT_FN_ATTRS
2156 _mm_mask_compressstoreu_epi64 (void *__P
, __mmask8 __U
, __m128i __A
) {
2157 __builtin_ia32_compressstoredi128_mask ((__v2di
*) __P
,
2162 static __inline__
void __DEFAULT_FN_ATTRS
2163 _mm256_mask_compressstoreu_epi64 (void *__P
, __mmask8 __U
, __m256i __A
) {
2164 __builtin_ia32_compressstoredi256_mask ((__v4di
*) __P
,
2169 static __inline__
void __DEFAULT_FN_ATTRS
2170 _mm_mask_compressstoreu_ps (void *__P
, __mmask8 __U
, __m128 __A
) {
2171 __builtin_ia32_compressstoresf128_mask ((__v4sf
*) __P
,
2176 static __inline__
void __DEFAULT_FN_ATTRS
2177 _mm256_mask_compressstoreu_ps (void *__P
, __mmask8 __U
, __m256 __A
) {
2178 __builtin_ia32_compressstoresf256_mask ((__v8sf
*) __P
,
2183 static __inline__
void __DEFAULT_FN_ATTRS
2184 _mm_mask_compressstoreu_epi32 (void *__P
, __mmask8 __U
, __m128i __A
) {
2185 __builtin_ia32_compressstoresi128_mask ((__v4si
*) __P
,
2190 static __inline__
void __DEFAULT_FN_ATTRS
2191 _mm256_mask_compressstoreu_epi32 (void *__P
, __mmask8 __U
, __m256i __A
) {
2192 __builtin_ia32_compressstoresi256_mask ((__v8si
*) __P
,
2197 static __inline__ __m128d __DEFAULT_FN_ATTRS
2198 _mm_mask_cvtepi32_pd (__m128d __W
, __mmask8 __U
, __m128i __A
) {
2199 return (__m128d
) __builtin_ia32_cvtdq2pd128_mask ((__v4si
) __A
,
2204 static __inline__ __m128d __DEFAULT_FN_ATTRS
2205 _mm_maskz_cvtepi32_pd (__mmask8 __U
, __m128i __A
) {
2206 return (__m128d
) __builtin_ia32_cvtdq2pd128_mask ((__v4si
) __A
,
2212 static __inline__ __m256d __DEFAULT_FN_ATTRS
2213 _mm256_mask_cvtepi32_pd (__m256d __W
, __mmask8 __U
, __m128i __A
) {
2214 return (__m256d
) __builtin_ia32_cvtdq2pd256_mask ((__v4si
) __A
,
2219 static __inline__ __m256d __DEFAULT_FN_ATTRS
2220 _mm256_maskz_cvtepi32_pd (__mmask8 __U
, __m128i __A
) {
2221 return (__m256d
) __builtin_ia32_cvtdq2pd256_mask ((__v4si
) __A
,
2223 _mm256_setzero_pd (),
2227 static __inline__ __m128 __DEFAULT_FN_ATTRS
2228 _mm_mask_cvtepi32_ps (__m128 __W
, __mmask8 __U
, __m128i __A
) {
2229 return (__m128
) __builtin_ia32_cvtdq2ps128_mask ((__v4si
) __A
,
2234 static __inline__ __m128 __DEFAULT_FN_ATTRS
2235 _mm_maskz_cvtepi32_ps (__mmask16 __U
, __m128i __A
) {
2236 return (__m128
) __builtin_ia32_cvtdq2ps128_mask ((__v4si
) __A
,
2242 static __inline__ __m256 __DEFAULT_FN_ATTRS
2243 _mm256_mask_cvtepi32_ps (__m256 __W
, __mmask8 __U
, __m256i __A
) {
2244 return (__m256
) __builtin_ia32_cvtdq2ps256_mask ((__v8si
) __A
,
2249 static __inline__ __m256 __DEFAULT_FN_ATTRS
2250 _mm256_maskz_cvtepi32_ps (__mmask16 __U
, __m256i __A
) {
2251 return (__m256
) __builtin_ia32_cvtdq2ps256_mask ((__v8si
) __A
,
2253 _mm256_setzero_ps (),
2257 static __inline__ __m128i __DEFAULT_FN_ATTRS
2258 _mm_mask_cvtpd_epi32 (__m128i __W
, __mmask8 __U
, __m128d __A
) {
2259 return (__m128i
) __builtin_ia32_cvtpd2dq128_mask ((__v2df
) __A
,
2264 static __inline__ __m128i __DEFAULT_FN_ATTRS
2265 _mm_maskz_cvtpd_epi32 (__mmask8 __U
, __m128d __A
) {
2266 return (__m128i
) __builtin_ia32_cvtpd2dq128_mask ((__v2df
) __A
,
2268 _mm_setzero_si128 (),
2272 static __inline__ __m128i __DEFAULT_FN_ATTRS
2273 _mm256_mask_cvtpd_epi32 (__m128i __W
, __mmask8 __U
, __m256d __A
) {
2274 return (__m128i
) __builtin_ia32_cvtpd2dq256_mask ((__v4df
) __A
,
2279 static __inline__ __m128i __DEFAULT_FN_ATTRS
2280 _mm256_maskz_cvtpd_epi32 (__mmask8 __U
, __m256d __A
) {
2281 return (__m128i
) __builtin_ia32_cvtpd2dq256_mask ((__v4df
) __A
,
2283 _mm_setzero_si128 (),
2287 static __inline__ __m128 __DEFAULT_FN_ATTRS
2288 _mm_mask_cvtpd_ps (__m128 __W
, __mmask8 __U
, __m128d __A
) {
2289 return (__m128
) __builtin_ia32_cvtpd2ps_mask ((__v2df
) __A
,
2294 static __inline__ __m128 __DEFAULT_FN_ATTRS
2295 _mm_maskz_cvtpd_ps (__mmask8 __U
, __m128d __A
) {
2296 return (__m128
) __builtin_ia32_cvtpd2ps_mask ((__v2df
) __A
,
2302 static __inline__ __m128 __DEFAULT_FN_ATTRS
2303 _mm256_mask_cvtpd_ps (__m128 __W
, __mmask8 __U
, __m256d __A
) {
2304 return (__m128
) __builtin_ia32_cvtpd2ps256_mask ((__v4df
) __A
,
2309 static __inline__ __m128 __DEFAULT_FN_ATTRS
2310 _mm256_maskz_cvtpd_ps (__mmask8 __U
, __m256d __A
) {
2311 return (__m128
) __builtin_ia32_cvtpd2ps256_mask ((__v4df
) __A
,
2317 static __inline__ __m128i __DEFAULT_FN_ATTRS
2318 _mm_cvtpd_epu32 (__m128d __A
) {
2319 return (__m128i
) __builtin_ia32_cvtpd2udq128_mask ((__v2df
) __A
,
2321 _mm_setzero_si128 (),
2325 static __inline__ __m128i __DEFAULT_FN_ATTRS
2326 _mm_mask_cvtpd_epu32 (__m128i __W
, __mmask8 __U
, __m128d __A
) {
2327 return (__m128i
) __builtin_ia32_cvtpd2udq128_mask ((__v2df
) __A
,
2332 static __inline__ __m128i __DEFAULT_FN_ATTRS
2333 _mm_maskz_cvtpd_epu32 (__mmask8 __U
, __m128d __A
) {
2334 return (__m128i
) __builtin_ia32_cvtpd2udq128_mask ((__v2df
) __A
,
2336 _mm_setzero_si128 (),
2340 static __inline__ __m128i __DEFAULT_FN_ATTRS
2341 _mm256_cvtpd_epu32 (__m256d __A
) {
2342 return (__m128i
) __builtin_ia32_cvtpd2udq256_mask ((__v4df
) __A
,
2344 _mm_setzero_si128 (),
2348 static __inline__ __m128i __DEFAULT_FN_ATTRS
2349 _mm256_mask_cvtpd_epu32 (__m128i __W
, __mmask8 __U
, __m256d __A
) {
2350 return (__m128i
) __builtin_ia32_cvtpd2udq256_mask ((__v4df
) __A
,
2355 static __inline__ __m128i __DEFAULT_FN_ATTRS
2356 _mm256_maskz_cvtpd_epu32 (__mmask8 __U
, __m256d __A
) {
2357 return (__m128i
) __builtin_ia32_cvtpd2udq256_mask ((__v4df
) __A
,
2359 _mm_setzero_si128 (),
2363 static __inline__ __m128i __DEFAULT_FN_ATTRS
2364 _mm_mask_cvtps_epi32 (__m128i __W
, __mmask8 __U
, __m128 __A
) {
2365 return (__m128i
) __builtin_ia32_cvtps2dq128_mask ((__v4sf
) __A
,
2370 static __inline__ __m128i __DEFAULT_FN_ATTRS
2371 _mm_maskz_cvtps_epi32 (__mmask8 __U
, __m128 __A
) {
2372 return (__m128i
) __builtin_ia32_cvtps2dq128_mask ((__v4sf
) __A
,
2374 _mm_setzero_si128 (),
2378 static __inline__ __m256i __DEFAULT_FN_ATTRS
2379 _mm256_mask_cvtps_epi32 (__m256i __W
, __mmask8 __U
, __m256 __A
) {
2380 return (__m256i
) __builtin_ia32_cvtps2dq256_mask ((__v8sf
) __A
,
2385 static __inline__ __m256i __DEFAULT_FN_ATTRS
2386 _mm256_maskz_cvtps_epi32 (__mmask8 __U
, __m256 __A
) {
2387 return (__m256i
) __builtin_ia32_cvtps2dq256_mask ((__v8sf
) __A
,
2389 _mm256_setzero_si256 (),
2393 static __inline__ __m128d __DEFAULT_FN_ATTRS
2394 _mm_mask_cvtps_pd (__m128d __W
, __mmask8 __U
, __m128 __A
) {
2395 return (__m128d
) __builtin_ia32_cvtps2pd128_mask ((__v4sf
) __A
,
2400 static __inline__ __m128d __DEFAULT_FN_ATTRS
2401 _mm_maskz_cvtps_pd (__mmask8 __U
, __m128 __A
) {
2402 return (__m128d
) __builtin_ia32_cvtps2pd128_mask ((__v4sf
) __A
,
2408 static __inline__ __m256d __DEFAULT_FN_ATTRS
2409 _mm256_mask_cvtps_pd (__m256d __W
, __mmask8 __U
, __m128 __A
) {
2410 return (__m256d
) __builtin_ia32_cvtps2pd256_mask ((__v4sf
) __A
,
2415 static __inline__ __m256d __DEFAULT_FN_ATTRS
2416 _mm256_maskz_cvtps_pd (__mmask8 __U
, __m128 __A
) {
2417 return (__m256d
) __builtin_ia32_cvtps2pd256_mask ((__v4sf
) __A
,
2419 _mm256_setzero_pd (),
2423 static __inline__ __m128i __DEFAULT_FN_ATTRS
2424 _mm_cvtps_epu32 (__m128 __A
) {
2425 return (__m128i
) __builtin_ia32_cvtps2udq128_mask ((__v4sf
) __A
,
2427 _mm_setzero_si128 (),
2431 static __inline__ __m128i __DEFAULT_FN_ATTRS
2432 _mm_mask_cvtps_epu32 (__m128i __W
, __mmask8 __U
, __m128 __A
) {
2433 return (__m128i
) __builtin_ia32_cvtps2udq128_mask ((__v4sf
) __A
,
2438 static __inline__ __m128i __DEFAULT_FN_ATTRS
2439 _mm_maskz_cvtps_epu32 (__mmask8 __U
, __m128 __A
) {
2440 return (__m128i
) __builtin_ia32_cvtps2udq128_mask ((__v4sf
) __A
,
2442 _mm_setzero_si128 (),
2446 static __inline__ __m256i __DEFAULT_FN_ATTRS
2447 _mm256_cvtps_epu32 (__m256 __A
) {
2448 return (__m256i
) __builtin_ia32_cvtps2udq256_mask ((__v8sf
) __A
,
2450 _mm256_setzero_si256 (),
2454 static __inline__ __m256i __DEFAULT_FN_ATTRS
2455 _mm256_mask_cvtps_epu32 (__m256i __W
, __mmask8 __U
, __m256 __A
) {
2456 return (__m256i
) __builtin_ia32_cvtps2udq256_mask ((__v8sf
) __A
,
2461 static __inline__ __m256i __DEFAULT_FN_ATTRS
2462 _mm256_maskz_cvtps_epu32 (__mmask8 __U
, __m256 __A
) {
2463 return (__m256i
) __builtin_ia32_cvtps2udq256_mask ((__v8sf
) __A
,
2465 _mm256_setzero_si256 (),
2469 static __inline__ __m128i __DEFAULT_FN_ATTRS
2470 _mm_mask_cvttpd_epi32 (__m128i __W
, __mmask8 __U
, __m128d __A
) {
2471 return (__m128i
) __builtin_ia32_cvttpd2dq128_mask ((__v2df
) __A
,
2476 static __inline__ __m128i __DEFAULT_FN_ATTRS
2477 _mm_maskz_cvttpd_epi32 (__mmask8 __U
, __m128d __A
) {
2478 return (__m128i
) __builtin_ia32_cvttpd2dq128_mask ((__v2df
) __A
,
2480 _mm_setzero_si128 (),
2484 static __inline__ __m128i __DEFAULT_FN_ATTRS
2485 _mm256_mask_cvttpd_epi32 (__m128i __W
, __mmask8 __U
, __m256d __A
) {
2486 return (__m128i
) __builtin_ia32_cvttpd2dq256_mask ((__v4df
) __A
,
2491 static __inline__ __m128i __DEFAULT_FN_ATTRS
2492 _mm256_maskz_cvttpd_epi32 (__mmask8 __U
, __m256d __A
) {
2493 return (__m128i
) __builtin_ia32_cvttpd2dq256_mask ((__v4df
) __A
,
2495 _mm_setzero_si128 (),
2499 static __inline__ __m128i __DEFAULT_FN_ATTRS
2500 _mm_cvttpd_epu32 (__m128d __A
) {
2501 return (__m128i
) __builtin_ia32_cvttpd2udq128_mask ((__v2df
) __A
,
2503 _mm_setzero_si128 (),
2507 static __inline__ __m128i __DEFAULT_FN_ATTRS
2508 _mm_mask_cvttpd_epu32 (__m128i __W
, __mmask8 __U
, __m128d __A
) {
2509 return (__m128i
) __builtin_ia32_cvttpd2udq128_mask ((__v2df
) __A
,
2514 static __inline__ __m128i __DEFAULT_FN_ATTRS
2515 _mm_maskz_cvttpd_epu32 (__mmask8 __U
, __m128d __A
) {
2516 return (__m128i
) __builtin_ia32_cvttpd2udq128_mask ((__v2df
) __A
,
2518 _mm_setzero_si128 (),
2522 static __inline__ __m128i __DEFAULT_FN_ATTRS
2523 _mm256_cvttpd_epu32 (__m256d __A
) {
2524 return (__m128i
) __builtin_ia32_cvttpd2udq256_mask ((__v4df
) __A
,
2526 _mm_setzero_si128 (),
2530 static __inline__ __m128i __DEFAULT_FN_ATTRS
2531 _mm256_mask_cvttpd_epu32 (__m128i __W
, __mmask8 __U
, __m256d __A
) {
2532 return (__m128i
) __builtin_ia32_cvttpd2udq256_mask ((__v4df
) __A
,
2537 static __inline__ __m128i __DEFAULT_FN_ATTRS
2538 _mm256_maskz_cvttpd_epu32 (__mmask8 __U
, __m256d __A
) {
2539 return (__m128i
) __builtin_ia32_cvttpd2udq256_mask ((__v4df
) __A
,
2541 _mm_setzero_si128 (),
2545 static __inline__ __m128i __DEFAULT_FN_ATTRS
2546 _mm_mask_cvttps_epi32 (__m128i __W
, __mmask8 __U
, __m128 __A
) {
2547 return (__m128i
) __builtin_ia32_cvttps2dq128_mask ((__v4sf
) __A
,
2552 static __inline__ __m128i __DEFAULT_FN_ATTRS
2553 _mm_maskz_cvttps_epi32 (__mmask8 __U
, __m128 __A
) {
2554 return (__m128i
) __builtin_ia32_cvttps2dq128_mask ((__v4sf
) __A
,
2556 _mm_setzero_si128 (),
2560 static __inline__ __m256i __DEFAULT_FN_ATTRS
2561 _mm256_mask_cvttps_epi32 (__m256i __W
, __mmask8 __U
, __m256 __A
) {
2562 return (__m256i
) __builtin_ia32_cvttps2dq256_mask ((__v8sf
) __A
,
2567 static __inline__ __m256i __DEFAULT_FN_ATTRS
2568 _mm256_maskz_cvttps_epi32 (__mmask8 __U
, __m256 __A
) {
2569 return (__m256i
) __builtin_ia32_cvttps2dq256_mask ((__v8sf
) __A
,
2571 _mm256_setzero_si256 (),
2575 static __inline__ __m128i __DEFAULT_FN_ATTRS
2576 _mm_cvttps_epu32 (__m128 __A
) {
2577 return (__m128i
) __builtin_ia32_cvttps2udq128_mask ((__v4sf
) __A
,
2579 _mm_setzero_si128 (),
2583 static __inline__ __m128i __DEFAULT_FN_ATTRS
2584 _mm_mask_cvttps_epu32 (__m128i __W
, __mmask8 __U
, __m128 __A
) {
2585 return (__m128i
) __builtin_ia32_cvttps2udq128_mask ((__v4sf
) __A
,
2590 static __inline__ __m128i __DEFAULT_FN_ATTRS
2591 _mm_maskz_cvttps_epu32 (__mmask8 __U
, __m128 __A
) {
2592 return (__m128i
) __builtin_ia32_cvttps2udq128_mask ((__v4sf
) __A
,
2594 _mm_setzero_si128 (),
2598 static __inline__ __m256i __DEFAULT_FN_ATTRS
2599 _mm256_cvttps_epu32 (__m256 __A
) {
2600 return (__m256i
) __builtin_ia32_cvttps2udq256_mask ((__v8sf
) __A
,
2602 _mm256_setzero_si256 (),
2606 static __inline__ __m256i __DEFAULT_FN_ATTRS
2607 _mm256_mask_cvttps_epu32 (__m256i __W
, __mmask8 __U
, __m256 __A
) {
2608 return (__m256i
) __builtin_ia32_cvttps2udq256_mask ((__v8sf
) __A
,
2613 static __inline__ __m256i __DEFAULT_FN_ATTRS
2614 _mm256_maskz_cvttps_epu32 (__mmask8 __U
, __m256 __A
) {
2615 return (__m256i
) __builtin_ia32_cvttps2udq256_mask ((__v8sf
) __A
,
2617 _mm256_setzero_si256 (),
2621 static __inline__ __m128d __DEFAULT_FN_ATTRS
2622 _mm_cvtepu32_pd (__m128i __A
) {
2623 return (__m128d
) __builtin_ia32_cvtudq2pd128_mask ((__v4si
) __A
,
2629 static __inline__ __m128d __DEFAULT_FN_ATTRS
2630 _mm_mask_cvtepu32_pd (__m128d __W
, __mmask8 __U
, __m128i __A
) {
2631 return (__m128d
) __builtin_ia32_cvtudq2pd128_mask ((__v4si
) __A
,
2636 static __inline__ __m128d __DEFAULT_FN_ATTRS
2637 _mm_maskz_cvtepu32_pd (__mmask8 __U
, __m128i __A
) {
2638 return (__m128d
) __builtin_ia32_cvtudq2pd128_mask ((__v4si
) __A
,
2644 static __inline__ __m256d __DEFAULT_FN_ATTRS
2645 _mm256_cvtepu32_pd (__m128i __A
) {
2646 return (__m256d
) __builtin_ia32_cvtudq2pd256_mask ((__v4si
) __A
,
2648 _mm256_setzero_pd (),
2652 static __inline__ __m256d __DEFAULT_FN_ATTRS
2653 _mm256_mask_cvtepu32_pd (__m256d __W
, __mmask8 __U
, __m128i __A
) {
2654 return (__m256d
) __builtin_ia32_cvtudq2pd256_mask ((__v4si
) __A
,
2659 static __inline__ __m256d __DEFAULT_FN_ATTRS
2660 _mm256_maskz_cvtepu32_pd (__mmask8 __U
, __m128i __A
) {
2661 return (__m256d
) __builtin_ia32_cvtudq2pd256_mask ((__v4si
) __A
,
2663 _mm256_setzero_pd (),
2667 static __inline__ __m128 __DEFAULT_FN_ATTRS
2668 _mm_cvtepu32_ps (__m128i __A
) {
2669 return (__m128
) __builtin_ia32_cvtudq2ps128_mask ((__v4si
) __A
,
2675 static __inline__ __m128 __DEFAULT_FN_ATTRS
2676 _mm_mask_cvtepu32_ps (__m128 __W
, __mmask8 __U
, __m128i __A
) {
2677 return (__m128
) __builtin_ia32_cvtudq2ps128_mask ((__v4si
) __A
,
2682 static __inline__ __m128 __DEFAULT_FN_ATTRS
2683 _mm_maskz_cvtepu32_ps (__mmask8 __U
, __m128i __A
) {
2684 return (__m128
) __builtin_ia32_cvtudq2ps128_mask ((__v4si
) __A
,
2690 static __inline__ __m256 __DEFAULT_FN_ATTRS
2691 _mm256_cvtepu32_ps (__m256i __A
) {
2692 return (__m256
) __builtin_ia32_cvtudq2ps256_mask ((__v8si
) __A
,
2694 _mm256_setzero_ps (),
2698 static __inline__ __m256 __DEFAULT_FN_ATTRS
2699 _mm256_mask_cvtepu32_ps (__m256 __W
, __mmask8 __U
, __m256i __A
) {
2700 return (__m256
) __builtin_ia32_cvtudq2ps256_mask ((__v8si
) __A
,
2705 static __inline__ __m256 __DEFAULT_FN_ATTRS
2706 _mm256_maskz_cvtepu32_ps (__mmask8 __U
, __m256i __A
) {
2707 return (__m256
) __builtin_ia32_cvtudq2ps256_mask ((__v8si
) __A
,
2709 _mm256_setzero_ps (),
2713 static __inline__ __m128d __DEFAULT_FN_ATTRS
2714 _mm_mask_div_pd (__m128d __W
, __mmask8 __U
, __m128d __A
, __m128d __B
) {
2715 return (__m128d
) __builtin_ia32_divpd_mask ((__v2df
) __A
,
2721 static __inline__ __m128d __DEFAULT_FN_ATTRS
2722 _mm_maskz_div_pd (__mmask8 __U
, __m128d __A
, __m128d __B
) {
2723 return (__m128d
) __builtin_ia32_divpd_mask ((__v2df
) __A
,
2730 static __inline__ __m256d __DEFAULT_FN_ATTRS
2731 _mm256_mask_div_pd (__m256d __W
, __mmask8 __U
, __m256d __A
,
2733 return (__m256d
) __builtin_ia32_divpd256_mask ((__v4df
) __A
,
2739 static __inline__ __m256d __DEFAULT_FN_ATTRS
2740 _mm256_maskz_div_pd (__mmask8 __U
, __m256d __A
, __m256d __B
) {
2741 return (__m256d
) __builtin_ia32_divpd256_mask ((__v4df
) __A
,
2744 _mm256_setzero_pd (),
2748 static __inline__ __m128 __DEFAULT_FN_ATTRS
2749 _mm_mask_div_ps (__m128 __W
, __mmask8 __U
, __m128 __A
, __m128 __B
) {
2750 return (__m128
) __builtin_ia32_divps_mask ((__v4sf
) __A
,
2756 static __inline__ __m128 __DEFAULT_FN_ATTRS
2757 _mm_maskz_div_ps (__mmask8 __U
, __m128 __A
, __m128 __B
) {
2758 return (__m128
) __builtin_ia32_divps_mask ((__v4sf
) __A
,
2765 static __inline__ __m256 __DEFAULT_FN_ATTRS
2766 _mm256_mask_div_ps (__m256 __W
, __mmask8 __U
, __m256 __A
, __m256 __B
) {
2767 return (__m256
) __builtin_ia32_divps256_mask ((__v8sf
) __A
,
2773 static __inline__ __m256 __DEFAULT_FN_ATTRS
2774 _mm256_maskz_div_ps (__mmask8 __U
, __m256 __A
, __m256 __B
) {
2775 return (__m256
) __builtin_ia32_divps256_mask ((__v8sf
) __A
,
2778 _mm256_setzero_ps (),
2782 static __inline__ __m128d __DEFAULT_FN_ATTRS
2783 _mm_mask_expand_pd (__m128d __W
, __mmask8 __U
, __m128d __A
) {
2784 return (__m128d
) __builtin_ia32_expanddf128_mask ((__v2df
) __A
,
2789 static __inline__ __m128d __DEFAULT_FN_ATTRS
2790 _mm_maskz_expand_pd (__mmask8 __U
, __m128d __A
) {
2791 return (__m128d
) __builtin_ia32_expanddf128_mask ((__v2df
) __A
,
2797 static __inline__ __m256d __DEFAULT_FN_ATTRS
2798 _mm256_mask_expand_pd (__m256d __W
, __mmask8 __U
, __m256d __A
) {
2799 return (__m256d
) __builtin_ia32_expanddf256_mask ((__v4df
) __A
,
2804 static __inline__ __m256d __DEFAULT_FN_ATTRS
2805 _mm256_maskz_expand_pd (__mmask8 __U
, __m256d __A
) {
2806 return (__m256d
) __builtin_ia32_expanddf256_mask ((__v4df
) __A
,
2808 _mm256_setzero_pd (),
2812 static __inline__ __m128i __DEFAULT_FN_ATTRS
2813 _mm_mask_expand_epi64 (__m128i __W
, __mmask8 __U
, __m128i __A
) {
2814 return (__m128i
) __builtin_ia32_expanddi128_mask ((__v2di
) __A
,
2819 static __inline__ __m128i __DEFAULT_FN_ATTRS
2820 _mm_maskz_expand_epi64 (__mmask8 __U
, __m128i __A
) {
2821 return (__m128i
) __builtin_ia32_expanddi128_mask ((__v2di
) __A
,
2823 _mm_setzero_si128 (),
2827 static __inline__ __m256i __DEFAULT_FN_ATTRS
2828 _mm256_mask_expand_epi64 (__m256i __W
, __mmask8 __U
, __m256i __A
) {
2829 return (__m256i
) __builtin_ia32_expanddi256_mask ((__v4di
) __A
,
2834 static __inline__ __m256i __DEFAULT_FN_ATTRS
2835 _mm256_maskz_expand_epi64 (__mmask8 __U
, __m256i __A
) {
2836 return (__m256i
) __builtin_ia32_expanddi256_mask ((__v4di
) __A
,
2838 _mm256_setzero_si256 (),
2842 static __inline__ __m128d __DEFAULT_FN_ATTRS
2843 _mm_mask_expandloadu_pd (__m128d __W
, __mmask8 __U
, void const *__P
) {
2844 return (__m128d
) __builtin_ia32_expandloaddf128_mask ((__v2df
*) __P
,
2850 static __inline__ __m128d __DEFAULT_FN_ATTRS
2851 _mm_maskz_expandloadu_pd (__mmask8 __U
, void const *__P
) {
2852 return (__m128d
) __builtin_ia32_expandloaddf128_mask ((__v2df
*) __P
,
2859 static __inline__ __m256d __DEFAULT_FN_ATTRS
2860 _mm256_mask_expandloadu_pd (__m256d __W
, __mmask8 __U
, void const *__P
) {
2861 return (__m256d
) __builtin_ia32_expandloaddf256_mask ((__v4df
*) __P
,
2867 static __inline__ __m256d __DEFAULT_FN_ATTRS
2868 _mm256_maskz_expandloadu_pd (__mmask8 __U
, void const *__P
) {
2869 return (__m256d
) __builtin_ia32_expandloaddf256_mask ((__v4df
*) __P
,
2871 _mm256_setzero_pd (),
2876 static __inline__ __m128i __DEFAULT_FN_ATTRS
2877 _mm_mask_expandloadu_epi64 (__m128i __W
, __mmask8 __U
, void const *__P
) {
2878 return (__m128i
) __builtin_ia32_expandloaddi128_mask ((__v2di
*) __P
,
2884 static __inline__ __m128i __DEFAULT_FN_ATTRS
2885 _mm_maskz_expandloadu_epi64 (__mmask8 __U
, void const *__P
) {
2886 return (__m128i
) __builtin_ia32_expandloaddi128_mask ((__v2di
*) __P
,
2888 _mm_setzero_si128 (),
2893 static __inline__ __m256i __DEFAULT_FN_ATTRS
2894 _mm256_mask_expandloadu_epi64 (__m256i __W
, __mmask8 __U
,
2896 return (__m256i
) __builtin_ia32_expandloaddi256_mask ((__v4di
*) __P
,
2902 static __inline__ __m256i __DEFAULT_FN_ATTRS
2903 _mm256_maskz_expandloadu_epi64 (__mmask8 __U
, void const *__P
) {
2904 return (__m256i
) __builtin_ia32_expandloaddi256_mask ((__v4di
*) __P
,
2906 _mm256_setzero_si256 (),
2911 static __inline__ __m128 __DEFAULT_FN_ATTRS
2912 _mm_mask_expandloadu_ps (__m128 __W
, __mmask8 __U
, void const *__P
) {
2913 return (__m128
) __builtin_ia32_expandloadsf128_mask ((__v4sf
*) __P
,
2918 static __inline__ __m128 __DEFAULT_FN_ATTRS
2919 _mm_maskz_expandloadu_ps (__mmask8 __U
, void const *__P
) {
2920 return (__m128
) __builtin_ia32_expandloadsf128_mask ((__v4sf
*) __P
,
2927 static __inline__ __m256 __DEFAULT_FN_ATTRS
2928 _mm256_mask_expandloadu_ps (__m256 __W
, __mmask8 __U
, void const *__P
) {
2929 return (__m256
) __builtin_ia32_expandloadsf256_mask ((__v8sf
*) __P
,
2934 static __inline__ __m256 __DEFAULT_FN_ATTRS
2935 _mm256_maskz_expandloadu_ps (__mmask8 __U
, void const *__P
) {
2936 return (__m256
) __builtin_ia32_expandloadsf256_mask ((__v8sf
*) __P
,
2938 _mm256_setzero_ps (),
2943 static __inline__ __m128i __DEFAULT_FN_ATTRS
2944 _mm_mask_expandloadu_epi32 (__m128i __W
, __mmask8 __U
, void const *__P
) {
2945 return (__m128i
) __builtin_ia32_expandloadsi128_mask ((__v4si
*) __P
,
2951 static __inline__ __m128i __DEFAULT_FN_ATTRS
2952 _mm_maskz_expandloadu_epi32 (__mmask8 __U
, void const *__P
) {
2953 return (__m128i
) __builtin_ia32_expandloadsi128_mask ((__v4si
*) __P
,
2955 _mm_setzero_si128 (),
2959 static __inline__ __m256i __DEFAULT_FN_ATTRS
2960 _mm256_mask_expandloadu_epi32 (__m256i __W
, __mmask8 __U
,
2962 return (__m256i
) __builtin_ia32_expandloadsi256_mask ((__v8si
*) __P
,
2968 static __inline__ __m256i __DEFAULT_FN_ATTRS
2969 _mm256_maskz_expandloadu_epi32 (__mmask8 __U
, void const *__P
) {
2970 return (__m256i
) __builtin_ia32_expandloadsi256_mask ((__v8si
*) __P
,
2972 _mm256_setzero_si256 (),
2977 static __inline__ __m128 __DEFAULT_FN_ATTRS
2978 _mm_mask_expand_ps (__m128 __W
, __mmask8 __U
, __m128 __A
) {
2979 return (__m128
) __builtin_ia32_expandsf128_mask ((__v4sf
) __A
,
2984 static __inline__ __m128 __DEFAULT_FN_ATTRS
2985 _mm_maskz_expand_ps (__mmask8 __U
, __m128 __A
) {
2986 return (__m128
) __builtin_ia32_expandsf128_mask ((__v4sf
) __A
,
2992 static __inline__ __m256 __DEFAULT_FN_ATTRS
2993 _mm256_mask_expand_ps (__m256 __W
, __mmask8 __U
, __m256 __A
) {
2994 return (__m256
) __builtin_ia32_expandsf256_mask ((__v8sf
) __A
,
2999 static __inline__ __m256 __DEFAULT_FN_ATTRS
3000 _mm256_maskz_expand_ps (__mmask8 __U
, __m256 __A
) {
3001 return (__m256
) __builtin_ia32_expandsf256_mask ((__v8sf
) __A
,
3003 _mm256_setzero_ps (),
3007 static __inline__ __m128i __DEFAULT_FN_ATTRS
3008 _mm_mask_expand_epi32 (__m128i __W
, __mmask8 __U
, __m128i __A
) {
3009 return (__m128i
) __builtin_ia32_expandsi128_mask ((__v4si
) __A
,
3014 static __inline__ __m128i __DEFAULT_FN_ATTRS
3015 _mm_maskz_expand_epi32 (__mmask8 __U
, __m128i __A
) {
3016 return (__m128i
) __builtin_ia32_expandsi128_mask ((__v4si
) __A
,
3018 _mm_setzero_si128 (),
3022 static __inline__ __m256i __DEFAULT_FN_ATTRS
3023 _mm256_mask_expand_epi32 (__m256i __W
, __mmask8 __U
, __m256i __A
) {
3024 return (__m256i
) __builtin_ia32_expandsi256_mask ((__v8si
) __A
,
3029 static __inline__ __m256i __DEFAULT_FN_ATTRS
3030 _mm256_maskz_expand_epi32 (__mmask8 __U
, __m256i __A
) {
3031 return (__m256i
) __builtin_ia32_expandsi256_mask ((__v8si
) __A
,
3033 _mm256_setzero_si256 (),
3037 static __inline__ __m128d __DEFAULT_FN_ATTRS
3038 _mm_getexp_pd (__m128d __A
) {
3039 return (__m128d
) __builtin_ia32_getexppd128_mask ((__v2df
) __A
,
3045 static __inline__ __m128d __DEFAULT_FN_ATTRS
3046 _mm_mask_getexp_pd (__m128d __W
, __mmask8 __U
, __m128d __A
) {
3047 return (__m128d
) __builtin_ia32_getexppd128_mask ((__v2df
) __A
,
3052 static __inline__ __m128d __DEFAULT_FN_ATTRS
3053 _mm_maskz_getexp_pd (__mmask8 __U
, __m128d __A
) {
3054 return (__m128d
) __builtin_ia32_getexppd128_mask ((__v2df
) __A
,
3060 static __inline__ __m256d __DEFAULT_FN_ATTRS
3061 _mm256_getexp_pd (__m256d __A
) {
3062 return (__m256d
) __builtin_ia32_getexppd256_mask ((__v4df
) __A
,
3064 _mm256_setzero_pd (),
3068 static __inline__ __m256d __DEFAULT_FN_ATTRS
3069 _mm256_mask_getexp_pd (__m256d __W
, __mmask8 __U
, __m256d __A
) {
3070 return (__m256d
) __builtin_ia32_getexppd256_mask ((__v4df
) __A
,
3075 static __inline__ __m256d __DEFAULT_FN_ATTRS
3076 _mm256_maskz_getexp_pd (__mmask8 __U
, __m256d __A
) {
3077 return (__m256d
) __builtin_ia32_getexppd256_mask ((__v4df
) __A
,
3079 _mm256_setzero_pd (),
3083 static __inline__ __m128 __DEFAULT_FN_ATTRS
3084 _mm_getexp_ps (__m128 __A
) {
3085 return (__m128
) __builtin_ia32_getexpps128_mask ((__v4sf
) __A
,
3091 static __inline__ __m128 __DEFAULT_FN_ATTRS
3092 _mm_mask_getexp_ps (__m128 __W
, __mmask8 __U
, __m128 __A
) {
3093 return (__m128
) __builtin_ia32_getexpps128_mask ((__v4sf
) __A
,
3098 static __inline__ __m128 __DEFAULT_FN_ATTRS
3099 _mm_maskz_getexp_ps (__mmask8 __U
, __m128 __A
) {
3100 return (__m128
) __builtin_ia32_getexpps128_mask ((__v4sf
) __A
,
3106 static __inline__ __m256 __DEFAULT_FN_ATTRS
3107 _mm256_getexp_ps (__m256 __A
) {
3108 return (__m256
) __builtin_ia32_getexpps256_mask ((__v8sf
) __A
,
3110 _mm256_setzero_ps (),
3114 static __inline__ __m256 __DEFAULT_FN_ATTRS
3115 _mm256_mask_getexp_ps (__m256 __W
, __mmask8 __U
, __m256 __A
) {
3116 return (__m256
) __builtin_ia32_getexpps256_mask ((__v8sf
) __A
,
3121 static __inline__ __m256 __DEFAULT_FN_ATTRS
3122 _mm256_maskz_getexp_ps (__mmask8 __U
, __m256 __A
) {
3123 return (__m256
) __builtin_ia32_getexpps256_mask ((__v8sf
) __A
,
3125 _mm256_setzero_ps (),
3129 static __inline__ __m128d __DEFAULT_FN_ATTRS
3130 _mm_mask_max_pd (__m128d __W
, __mmask8 __U
, __m128d __A
, __m128d __B
) {
3131 return (__m128d
) __builtin_ia32_maxpd_mask ((__v2df
) __A
,
3137 static __inline__ __m128d __DEFAULT_FN_ATTRS
3138 _mm_maskz_max_pd (__mmask8 __U
, __m128d __A
, __m128d __B
) {
3139 return (__m128d
) __builtin_ia32_maxpd_mask ((__v2df
) __A
,
3146 static __inline__ __m256d __DEFAULT_FN_ATTRS
3147 _mm256_mask_max_pd (__m256d __W
, __mmask8 __U
, __m256d __A
,
3149 return (__m256d
) __builtin_ia32_maxpd256_mask ((__v4df
) __A
,
3155 static __inline__ __m256d __DEFAULT_FN_ATTRS
3156 _mm256_maskz_max_pd (__mmask8 __U
, __m256d __A
, __m256d __B
) {
3157 return (__m256d
) __builtin_ia32_maxpd256_mask ((__v4df
) __A
,
3160 _mm256_setzero_pd (),
3164 static __inline__ __m128 __DEFAULT_FN_ATTRS
3165 _mm_mask_max_ps (__m128 __W
, __mmask8 __U
, __m128 __A
, __m128 __B
) {
3166 return (__m128
) __builtin_ia32_maxps_mask ((__v4sf
) __A
,
3172 static __inline__ __m128 __DEFAULT_FN_ATTRS
3173 _mm_maskz_max_ps (__mmask8 __U
, __m128 __A
, __m128 __B
) {
3174 return (__m128
) __builtin_ia32_maxps_mask ((__v4sf
) __A
,
3181 static __inline__ __m256 __DEFAULT_FN_ATTRS
3182 _mm256_mask_max_ps (__m256 __W
, __mmask8 __U
, __m256 __A
, __m256 __B
) {
3183 return (__m256
) __builtin_ia32_maxps256_mask ((__v8sf
) __A
,
3189 static __inline__ __m256 __DEFAULT_FN_ATTRS
3190 _mm256_maskz_max_ps (__mmask8 __U
, __m256 __A
, __m256 __B
) {
3191 return (__m256
) __builtin_ia32_maxps256_mask ((__v8sf
) __A
,
3194 _mm256_setzero_ps (),
3198 static __inline__ __m128d __DEFAULT_FN_ATTRS
3199 _mm_mask_min_pd (__m128d __W
, __mmask8 __U
, __m128d __A
, __m128d __B
) {
3200 return (__m128d
) __builtin_ia32_minpd_mask ((__v2df
) __A
,
3206 static __inline__ __m128d __DEFAULT_FN_ATTRS
3207 _mm_maskz_min_pd (__mmask8 __U
, __m128d __A
, __m128d __B
) {
3208 return (__m128d
) __builtin_ia32_minpd_mask ((__v2df
) __A
,
3215 static __inline__ __m256d __DEFAULT_FN_ATTRS
3216 _mm256_mask_min_pd (__m256d __W
, __mmask8 __U
, __m256d __A
,
3218 return (__m256d
) __builtin_ia32_minpd256_mask ((__v4df
) __A
,
3224 static __inline__ __m256d __DEFAULT_FN_ATTRS
3225 _mm256_maskz_min_pd (__mmask8 __U
, __m256d __A
, __m256d __B
) {
3226 return (__m256d
) __builtin_ia32_minpd256_mask ((__v4df
) __A
,
3229 _mm256_setzero_pd (),
3233 static __inline__ __m128 __DEFAULT_FN_ATTRS
3234 _mm_mask_min_ps (__m128 __W
, __mmask8 __U
, __m128 __A
, __m128 __B
) {
3235 return (__m128
) __builtin_ia32_minps_mask ((__v4sf
) __A
,
3241 static __inline__ __m128 __DEFAULT_FN_ATTRS
3242 _mm_maskz_min_ps (__mmask8 __U
, __m128 __A
, __m128 __B
) {
3243 return (__m128
) __builtin_ia32_minps_mask ((__v4sf
) __A
,
3250 static __inline__ __m256 __DEFAULT_FN_ATTRS
3251 _mm256_mask_min_ps (__m256 __W
, __mmask8 __U
, __m256 __A
, __m256 __B
) {
3252 return (__m256
) __builtin_ia32_minps256_mask ((__v8sf
) __A
,
3258 static __inline__ __m256 __DEFAULT_FN_ATTRS
3259 _mm256_maskz_min_ps (__mmask8 __U
, __m256 __A
, __m256 __B
) {
3260 return (__m256
) __builtin_ia32_minps256_mask ((__v8sf
) __A
,
3263 _mm256_setzero_ps (),
3267 static __inline__ __m128d __DEFAULT_FN_ATTRS
3268 _mm_mask_mul_pd (__m128d __W
, __mmask8 __U
, __m128d __A
, __m128d __B
) {
3269 return (__m128d
) __builtin_ia32_mulpd_mask ((__v2df
) __A
,
3275 static __inline__ __m128d __DEFAULT_FN_ATTRS
3276 _mm_maskz_mul_pd (__mmask8 __U
, __m128d __A
, __m128d __B
) {
3277 return (__m128d
) __builtin_ia32_mulpd_mask ((__v2df
) __A
,
3284 static __inline__ __m256d __DEFAULT_FN_ATTRS
3285 _mm256_mask_mul_pd (__m256d __W
, __mmask8 __U
, __m256d __A
,
3287 return (__m256d
) __builtin_ia32_mulpd256_mask ((__v4df
) __A
,
3293 static __inline__ __m256d __DEFAULT_FN_ATTRS
3294 _mm256_maskz_mul_pd (__mmask8 __U
, __m256d __A
, __m256d __B
) {
3295 return (__m256d
) __builtin_ia32_mulpd256_mask ((__v4df
) __A
,
3298 _mm256_setzero_pd (),
3302 static __inline__ __m128 __DEFAULT_FN_ATTRS
3303 _mm_mask_mul_ps (__m128 __W
, __mmask8 __U
, __m128 __A
, __m128 __B
) {
3304 return (__m128
) __builtin_ia32_mulps_mask ((__v4sf
) __A
,
3310 static __inline__ __m128 __DEFAULT_FN_ATTRS
3311 _mm_maskz_mul_ps (__mmask8 __U
, __m128 __A
, __m128 __B
) {
3312 return (__m128
) __builtin_ia32_mulps_mask ((__v4sf
) __A
,
3319 static __inline__ __m256 __DEFAULT_FN_ATTRS
3320 _mm256_mask_mul_ps (__m256 __W
, __mmask8 __U
, __m256 __A
, __m256 __B
) {
3321 return (__m256
) __builtin_ia32_mulps256_mask ((__v8sf
) __A
,
3327 static __inline__ __m256 __DEFAULT_FN_ATTRS
3328 _mm256_maskz_mul_ps (__mmask8 __U
, __m256 __A
, __m256 __B
) {
3329 return (__m256
) __builtin_ia32_mulps256_mask ((__v8sf
) __A
,
3332 _mm256_setzero_ps (),
3336 static __inline__ __m128i __DEFAULT_FN_ATTRS
3337 _mm_mask_abs_epi32 (__m128i __W
, __mmask8 __U
, __m128i __A
) {
3338 return (__m128i
) __builtin_ia32_pabsd128_mask ((__v4si
) __A
,
3343 static __inline__ __m128i __DEFAULT_FN_ATTRS
3344 _mm_maskz_abs_epi32 (__mmask8 __U
, __m128i __A
) {
3345 return (__m128i
) __builtin_ia32_pabsd128_mask ((__v4si
) __A
,
3347 _mm_setzero_si128 (),
3351 static __inline__ __m256i __DEFAULT_FN_ATTRS
3352 _mm256_mask_abs_epi32 (__m256i __W
, __mmask8 __U
, __m256i __A
) {
3353 return (__m256i
) __builtin_ia32_pabsd256_mask ((__v8si
) __A
,
3358 static __inline__ __m256i __DEFAULT_FN_ATTRS
3359 _mm256_maskz_abs_epi32 (__mmask8 __U
, __m256i __A
) {
3360 return (__m256i
) __builtin_ia32_pabsd256_mask ((__v8si
) __A
,
3362 _mm256_setzero_si256 (),
3366 static __inline__ __m128i __DEFAULT_FN_ATTRS
3367 _mm_abs_epi64 (__m128i __A
) {
3368 return (__m128i
) __builtin_ia32_pabsq128_mask ((__v2di
) __A
,
3370 _mm_setzero_si128 (),
3374 static __inline__ __m128i __DEFAULT_FN_ATTRS
3375 _mm_mask_abs_epi64 (__m128i __W
, __mmask8 __U
, __m128i __A
) {
3376 return (__m128i
) __builtin_ia32_pabsq128_mask ((__v2di
) __A
,
3381 static __inline__ __m128i __DEFAULT_FN_ATTRS
3382 _mm_maskz_abs_epi64 (__mmask8 __U
, __m128i __A
) {
3383 return (__m128i
) __builtin_ia32_pabsq128_mask ((__v2di
) __A
,
3385 _mm_setzero_si128 (),
3389 static __inline__ __m256i __DEFAULT_FN_ATTRS
3390 _mm256_abs_epi64 (__m256i __A
) {
3391 return (__m256i
) __builtin_ia32_pabsq256_mask ((__v4di
) __A
,
3393 _mm256_setzero_si256 (),
3397 static __inline__ __m256i __DEFAULT_FN_ATTRS
3398 _mm256_mask_abs_epi64 (__m256i __W
, __mmask8 __U
, __m256i __A
) {
3399 return (__m256i
) __builtin_ia32_pabsq256_mask ((__v4di
) __A
,
3404 static __inline__ __m256i __DEFAULT_FN_ATTRS
3405 _mm256_maskz_abs_epi64 (__mmask8 __U
, __m256i __A
) {
3406 return (__m256i
) __builtin_ia32_pabsq256_mask ((__v4di
) __A
,
3408 _mm256_setzero_si256 (),
3412 static __inline__ __m128i __DEFAULT_FN_ATTRS
3413 _mm_maskz_max_epi32 (__mmask8 __M
, __m128i __A
, __m128i __B
) {
3414 return (__m128i
) __builtin_ia32_pmaxsd128_mask ((__v4si
) __A
,
3417 _mm_setzero_si128 (),
3421 static __inline__ __m128i __DEFAULT_FN_ATTRS
3422 _mm_mask_max_epi32 (__m128i __W
, __mmask8 __M
, __m128i __A
,
3424 return (__m128i
) __builtin_ia32_pmaxsd128_mask ((__v4si
) __A
,
3429 static __inline__ __m256i __DEFAULT_FN_ATTRS
3430 _mm256_maskz_max_epi32 (__mmask8 __M
, __m256i __A
, __m256i __B
) {
3431 return (__m256i
) __builtin_ia32_pmaxsd256_mask ((__v8si
) __A
,
3434 _mm256_setzero_si256 (),
3438 static __inline__ __m256i __DEFAULT_FN_ATTRS
3439 _mm256_mask_max_epi32 (__m256i __W
, __mmask8 __M
, __m256i __A
,
3441 return (__m256i
) __builtin_ia32_pmaxsd256_mask ((__v8si
) __A
,
3446 static __inline__ __m128i __DEFAULT_FN_ATTRS
3447 _mm_maskz_max_epi64 (__mmask8 __M
, __m128i __A
, __m128i __B
) {
3448 return (__m128i
) __builtin_ia32_pmaxsq128_mask ((__v2di
) __A
,
3451 _mm_setzero_si128 (),
3455 static __inline__ __m128i __DEFAULT_FN_ATTRS
3456 _mm_mask_max_epi64 (__m128i __W
, __mmask8 __M
, __m128i __A
,
3458 return (__m128i
) __builtin_ia32_pmaxsq128_mask ((__v2di
) __A
,
3463 static __inline__ __m128i __DEFAULT_FN_ATTRS
3464 _mm_max_epi64 (__m128i __A
, __m128i __B
) {
3465 return (__m128i
) __builtin_ia32_pmaxsq128_mask ((__v2di
) __A
,
3468 _mm_setzero_si128 (),
3472 static __inline__ __m256i __DEFAULT_FN_ATTRS
3473 _mm256_maskz_max_epi64 (__mmask8 __M
, __m256i __A
, __m256i __B
) {
3474 return (__m256i
) __builtin_ia32_pmaxsq256_mask ((__v4di
) __A
,
3477 _mm256_setzero_si256 (),
3481 static __inline__ __m256i __DEFAULT_FN_ATTRS
3482 _mm256_mask_max_epi64 (__m256i __W
, __mmask8 __M
, __m256i __A
,
3484 return (__m256i
) __builtin_ia32_pmaxsq256_mask ((__v4di
) __A
,
3489 static __inline__ __m256i __DEFAULT_FN_ATTRS
3490 _mm256_max_epi64 (__m256i __A
, __m256i __B
) {
3491 return (__m256i
) __builtin_ia32_pmaxsq256_mask ((__v4di
) __A
,
3494 _mm256_setzero_si256 (),
3498 static __inline__ __m128i __DEFAULT_FN_ATTRS
3499 _mm_maskz_max_epu32 (__mmask8 __M
, __m128i __A
, __m128i __B
) {
3500 return (__m128i
) __builtin_ia32_pmaxud128_mask ((__v4si
) __A
,
3503 _mm_setzero_si128 (),
3507 static __inline__ __m128i __DEFAULT_FN_ATTRS
3508 _mm_mask_max_epu32 (__m128i __W
, __mmask8 __M
, __m128i __A
,
3510 return (__m128i
) __builtin_ia32_pmaxud128_mask ((__v4si
) __A
,
3515 static __inline__ __m256i __DEFAULT_FN_ATTRS
3516 _mm256_maskz_max_epu32 (__mmask8 __M
, __m256i __A
, __m256i __B
) {
3517 return (__m256i
) __builtin_ia32_pmaxud256_mask ((__v8si
) __A
,
3520 _mm256_setzero_si256 (),
3524 static __inline__ __m256i __DEFAULT_FN_ATTRS
3525 _mm256_mask_max_epu32 (__m256i __W
, __mmask8 __M
, __m256i __A
,
3527 return (__m256i
) __builtin_ia32_pmaxud256_mask ((__v8si
) __A
,
3532 static __inline__ __m128i __DEFAULT_FN_ATTRS
3533 _mm_maskz_max_epu64 (__mmask8 __M
, __m128i __A
, __m128i __B
) {
3534 return (__m128i
) __builtin_ia32_pmaxuq128_mask ((__v2di
) __A
,
3537 _mm_setzero_si128 (),
3541 static __inline__ __m128i __DEFAULT_FN_ATTRS
3542 _mm_max_epu64 (__m128i __A
, __m128i __B
) {
3543 return (__m128i
) __builtin_ia32_pmaxuq128_mask ((__v2di
) __A
,
3546 _mm_setzero_si128 (),
3550 static __inline__ __m128i __DEFAULT_FN_ATTRS
3551 _mm_mask_max_epu64 (__m128i __W
, __mmask8 __M
, __m128i __A
,
3553 return (__m128i
) __builtin_ia32_pmaxuq128_mask ((__v2di
) __A
,
3558 static __inline__ __m256i __DEFAULT_FN_ATTRS
3559 _mm256_maskz_max_epu64 (__mmask8 __M
, __m256i __A
, __m256i __B
) {
3560 return (__m256i
) __builtin_ia32_pmaxuq256_mask ((__v4di
) __A
,
3563 _mm256_setzero_si256 (),
3567 static __inline__ __m256i __DEFAULT_FN_ATTRS
3568 _mm256_max_epu64 (__m256i __A
, __m256i __B
) {
3569 return (__m256i
) __builtin_ia32_pmaxuq256_mask ((__v4di
) __A
,
3572 _mm256_setzero_si256 (),
3576 static __inline__ __m256i __DEFAULT_FN_ATTRS
3577 _mm256_mask_max_epu64 (__m256i __W
, __mmask8 __M
, __m256i __A
,
3579 return (__m256i
) __builtin_ia32_pmaxuq256_mask ((__v4di
) __A
,
3584 static __inline__ __m128i __DEFAULT_FN_ATTRS
3585 _mm_maskz_min_epi32 (__mmask8 __M
, __m128i __A
, __m128i __B
) {
3586 return (__m128i
) __builtin_ia32_pminsd128_mask ((__v4si
) __A
,
3589 _mm_setzero_si128 (),
3593 static __inline__ __m128i __DEFAULT_FN_ATTRS
3594 _mm_mask_min_epi32 (__m128i __W
, __mmask8 __M
, __m128i __A
,
3596 return (__m128i
) __builtin_ia32_pminsd128_mask ((__v4si
) __A
,
3601 static __inline__ __m256i __DEFAULT_FN_ATTRS
3602 _mm256_maskz_min_epi32 (__mmask8 __M
, __m256i __A
, __m256i __B
) {
3603 return (__m256i
) __builtin_ia32_pminsd256_mask ((__v8si
) __A
,
3606 _mm256_setzero_si256 (),
3610 static __inline__ __m256i __DEFAULT_FN_ATTRS
3611 _mm256_mask_min_epi32 (__m256i __W
, __mmask8 __M
, __m256i __A
,
3613 return (__m256i
) __builtin_ia32_pminsd256_mask ((__v8si
) __A
,
3618 static __inline__ __m128i __DEFAULT_FN_ATTRS
3619 _mm_min_epi64 (__m128i __A
, __m128i __B
) {
3620 return (__m128i
) __builtin_ia32_pminsq128_mask ((__v2di
) __A
,
3623 _mm_setzero_si128 (),
3627 static __inline__ __m128i __DEFAULT_FN_ATTRS
3628 _mm_mask_min_epi64 (__m128i __W
, __mmask8 __M
, __m128i __A
,
3630 return (__m128i
) __builtin_ia32_pminsq128_mask ((__v2di
) __A
,
3635 static __inline__ __m128i __DEFAULT_FN_ATTRS
3636 _mm_maskz_min_epi64 (__mmask8 __M
, __m128i __A
, __m128i __B
) {
3637 return (__m128i
) __builtin_ia32_pminsq128_mask ((__v2di
) __A
,
3640 _mm_setzero_si128 (),
3644 static __inline__ __m256i __DEFAULT_FN_ATTRS
3645 _mm256_min_epi64 (__m256i __A
, __m256i __B
) {
3646 return (__m256i
) __builtin_ia32_pminsq256_mask ((__v4di
) __A
,
3649 _mm256_setzero_si256 (),
3653 static __inline__ __m256i __DEFAULT_FN_ATTRS
3654 _mm256_mask_min_epi64 (__m256i __W
, __mmask8 __M
, __m256i __A
,
3656 return (__m256i
) __builtin_ia32_pminsq256_mask ((__v4di
) __A
,
3661 static __inline__ __m256i __DEFAULT_FN_ATTRS
3662 _mm256_maskz_min_epi64 (__mmask8 __M
, __m256i __A
, __m256i __B
) {
3663 return (__m256i
) __builtin_ia32_pminsq256_mask ((__v4di
) __A
,
3666 _mm256_setzero_si256 (),
3670 static __inline__ __m128i __DEFAULT_FN_ATTRS
3671 _mm_maskz_min_epu32 (__mmask8 __M
, __m128i __A
, __m128i __B
) {
3672 return (__m128i
) __builtin_ia32_pminud128_mask ((__v4si
) __A
,
3675 _mm_setzero_si128 (),
3679 static __inline__ __m128i __DEFAULT_FN_ATTRS
3680 _mm_mask_min_epu32 (__m128i __W
, __mmask8 __M
, __m128i __A
,
3682 return (__m128i
) __builtin_ia32_pminud128_mask ((__v4si
) __A
,
3687 static __inline__ __m256i __DEFAULT_FN_ATTRS
3688 _mm256_maskz_min_epu32 (__mmask8 __M
, __m256i __A
, __m256i __B
) {
3689 return (__m256i
) __builtin_ia32_pminud256_mask ((__v8si
) __A
,
3692 _mm256_setzero_si256 (),
3696 static __inline__ __m256i __DEFAULT_FN_ATTRS
3697 _mm256_mask_min_epu32 (__m256i __W
, __mmask8 __M
, __m256i __A
,
3699 return (__m256i
) __builtin_ia32_pminud256_mask ((__v8si
) __A
,
3704 static __inline__ __m128i __DEFAULT_FN_ATTRS
3705 _mm_min_epu64 (__m128i __A
, __m128i __B
) {
3706 return (__m128i
) __builtin_ia32_pminuq128_mask ((__v2di
) __A
,
3709 _mm_setzero_si128 (),
3713 static __inline__ __m128i __DEFAULT_FN_ATTRS
3714 _mm_mask_min_epu64 (__m128i __W
, __mmask8 __M
, __m128i __A
,
3716 return (__m128i
) __builtin_ia32_pminuq128_mask ((__v2di
) __A
,
3721 static __inline__ __m128i __DEFAULT_FN_ATTRS
3722 _mm_maskz_min_epu64 (__mmask8 __M
, __m128i __A
, __m128i __B
) {
3723 return (__m128i
) __builtin_ia32_pminuq128_mask ((__v2di
) __A
,
3726 _mm_setzero_si128 (),
3730 static __inline__ __m256i __DEFAULT_FN_ATTRS
3731 _mm256_min_epu64 (__m256i __A
, __m256i __B
) {
3732 return (__m256i
) __builtin_ia32_pminuq256_mask ((__v4di
) __A
,
3735 _mm256_setzero_si256 (),
3739 static __inline__ __m256i __DEFAULT_FN_ATTRS
3740 _mm256_mask_min_epu64 (__m256i __W
, __mmask8 __M
, __m256i __A
,
3742 return (__m256i
) __builtin_ia32_pminuq256_mask ((__v4di
) __A
,
3747 static __inline__ __m256i __DEFAULT_FN_ATTRS
3748 _mm256_maskz_min_epu64 (__mmask8 __M
, __m256i __A
, __m256i __B
) {
3749 return (__m256i
) __builtin_ia32_pminuq256_mask ((__v4di
) __A
,
3752 _mm256_setzero_si256 (),
3756 #define _mm_roundscale_pd(A, imm) __extension__ ({ \
3757 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3759 (__v2df)_mm_setzero_pd(), \
3763 #define _mm_mask_roundscale_pd(W, U, A, imm) __extension__ ({ \
3764 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3766 (__v2df)(__m128d)(W), \
3770 #define _mm_maskz_roundscale_pd(U, A, imm) __extension__ ({ \
3771 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3773 (__v2df)_mm_setzero_pd(), \
3777 #define _mm256_roundscale_pd(A, imm) __extension__ ({ \
3778 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3780 (__v4df)_mm256_setzero_pd(), \
3784 #define _mm256_mask_roundscale_pd(W, U, A, imm) __extension__ ({ \
3785 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3787 (__v4df)(__m256d)(W), \
3791 #define _mm256_maskz_roundscale_pd(U, A, imm) __extension__ ({ \
3792 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3794 (__v4df)_mm256_setzero_pd(), \
3797 #define _mm_roundscale_ps(A, imm) __extension__ ({ \
3798 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3799 (__v4sf)_mm_setzero_ps(), \
3803 #define _mm_mask_roundscale_ps(W, U, A, imm) __extension__ ({ \
3804 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3805 (__v4sf)(__m128)(W), \
3809 #define _mm_maskz_roundscale_ps(U, A, imm) __extension__ ({ \
3810 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3811 (__v4sf)_mm_setzero_ps(), \
3814 #define _mm256_roundscale_ps(A, imm) __extension__ ({ \
3815 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3816 (__v8sf)_mm256_setzero_ps(), \
3819 #define _mm256_mask_roundscale_ps(W, U, A, imm) __extension__ ({ \
3820 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3821 (__v8sf)(__m256)(W), \
3825 #define _mm256_maskz_roundscale_ps(U, A, imm) __extension__ ({ \
3826 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3827 (__v8sf)_mm256_setzero_ps(), \
3830 static __inline__ __m128d __DEFAULT_FN_ATTRS
3831 _mm_scalef_pd (__m128d __A
, __m128d __B
) {
3832 return (__m128d
) __builtin_ia32_scalefpd128_mask ((__v2df
) __A
,
3839 static __inline__ __m128d __DEFAULT_FN_ATTRS
3840 _mm_mask_scalef_pd (__m128d __W
, __mmask8 __U
, __m128d __A
,
3842 return (__m128d
) __builtin_ia32_scalefpd128_mask ((__v2df
) __A
,
3848 static __inline__ __m128d __DEFAULT_FN_ATTRS
3849 _mm_maskz_scalef_pd (__mmask8 __U
, __m128d __A
, __m128d __B
) {
3850 return (__m128d
) __builtin_ia32_scalefpd128_mask ((__v2df
) __A
,
3857 static __inline__ __m256d __DEFAULT_FN_ATTRS
3858 _mm256_scalef_pd (__m256d __A
, __m256d __B
) {
3859 return (__m256d
) __builtin_ia32_scalefpd256_mask ((__v4df
) __A
,
3862 _mm256_setzero_pd (),
3866 static __inline__ __m256d __DEFAULT_FN_ATTRS
3867 _mm256_mask_scalef_pd (__m256d __W
, __mmask8 __U
, __m256d __A
,
3869 return (__m256d
) __builtin_ia32_scalefpd256_mask ((__v4df
) __A
,
3875 static __inline__ __m256d __DEFAULT_FN_ATTRS
3876 _mm256_maskz_scalef_pd (__mmask8 __U
, __m256d __A
, __m256d __B
) {
3877 return (__m256d
) __builtin_ia32_scalefpd256_mask ((__v4df
) __A
,
3880 _mm256_setzero_pd (),
3884 static __inline__ __m128 __DEFAULT_FN_ATTRS
3885 _mm_scalef_ps (__m128 __A
, __m128 __B
) {
3886 return (__m128
) __builtin_ia32_scalefps128_mask ((__v4sf
) __A
,
3893 static __inline__ __m128 __DEFAULT_FN_ATTRS
3894 _mm_mask_scalef_ps (__m128 __W
, __mmask8 __U
, __m128 __A
, __m128 __B
) {
3895 return (__m128
) __builtin_ia32_scalefps128_mask ((__v4sf
) __A
,
3901 static __inline__ __m128 __DEFAULT_FN_ATTRS
3902 _mm_maskz_scalef_ps (__mmask8 __U
, __m128 __A
, __m128 __B
) {
3903 return (__m128
) __builtin_ia32_scalefps128_mask ((__v4sf
) __A
,
3910 static __inline__ __m256 __DEFAULT_FN_ATTRS
3911 _mm256_scalef_ps (__m256 __A
, __m256 __B
) {
3912 return (__m256
) __builtin_ia32_scalefps256_mask ((__v8sf
) __A
,
3915 _mm256_setzero_ps (),
3919 static __inline__ __m256 __DEFAULT_FN_ATTRS
3920 _mm256_mask_scalef_ps (__m256 __W
, __mmask8 __U
, __m256 __A
,
3922 return (__m256
) __builtin_ia32_scalefps256_mask ((__v8sf
) __A
,
3928 static __inline__ __m256 __DEFAULT_FN_ATTRS
3929 _mm256_maskz_scalef_ps (__mmask8 __U
, __m256 __A
, __m256 __B
) {
3930 return (__m256
) __builtin_ia32_scalefps256_mask ((__v8sf
) __A
,
3933 _mm256_setzero_ps (),
3937 #define _mm_i64scatter_pd(addr, index, v1, scale) __extension__ ({ \
3938 __builtin_ia32_scatterdiv2df((double *)(addr), (__mmask8)-1, \
3939 (__v2di)(__m128i)(index), \
3940 (__v2df)(__m128d)(v1), (int)(scale)); })
3942 #define _mm_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \
3943 __builtin_ia32_scatterdiv2df((double *)(addr), (__mmask8)(mask), \
3944 (__v2di)(__m128i)(index), \
3945 (__v2df)(__m128d)(v1), (int)(scale)); })
3947 #define _mm_i64scatter_epi64(addr, index, v1, scale) __extension__ ({ \
3948 __builtin_ia32_scatterdiv2di((long long *)(addr), (__mmask8)-1, \
3949 (__v2di)(__m128i)(index), \
3950 (__v2di)(__m128i)(v1), (int)(scale)); })
3952 #define _mm_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \
3953 __builtin_ia32_scatterdiv2di((long long *)(addr), (__mmask8)(mask), \
3954 (__v2di)(__m128i)(index), \
3955 (__v2di)(__m128i)(v1), (int)(scale)); })
3957 #define _mm256_i64scatter_pd(addr, index, v1, scale) __extension__ ({ \
3958 __builtin_ia32_scatterdiv4df((double *)(addr), (__mmask8)-1, \
3959 (__v4di)(__m256i)(index), \
3960 (__v4df)(__m256d)(v1), (int)(scale)); })
3962 #define _mm256_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \
3963 __builtin_ia32_scatterdiv4df((double *)(addr), (__mmask8)(mask), \
3964 (__v4di)(__m256i)(index), \
3965 (__v4df)(__m256d)(v1), (int)(scale)); })
3967 #define _mm256_i64scatter_epi64(addr, index, v1, scale) __extension__ ({ \
3968 __builtin_ia32_scatterdiv4di((long long *)(addr), (__mmask8)-1, \
3969 (__v4di)(__m256i)(index), \
3970 (__v4di)(__m256i)(v1), (int)(scale)); })
3972 #define _mm256_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \
3973 __builtin_ia32_scatterdiv4di((long long *)(addr), (__mmask8)(mask), \
3974 (__v4di)(__m256i)(index), \
3975 (__v4di)(__m256i)(v1), (int)(scale)); })
3977 #define _mm_i64scatter_ps(addr, index, v1, scale) __extension__ ({ \
3978 __builtin_ia32_scatterdiv4sf((float *)(addr), (__mmask8)-1, \
3979 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
3982 #define _mm_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \
3983 __builtin_ia32_scatterdiv4sf((float *)(addr), (__mmask8)(mask), \
3984 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
3987 #define _mm_i64scatter_epi32(addr, index, v1, scale) __extension__ ({ \
3988 __builtin_ia32_scatterdiv4si((int *)(addr), (__mmask8)-1, \
3989 (__v2di)(__m128i)(index), \
3990 (__v4si)(__m128i)(v1), (int)(scale)); })
3992 #define _mm_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \
3993 __builtin_ia32_scatterdiv4si((int *)(addr), (__mmask8)(mask), \
3994 (__v2di)(__m128i)(index), \
3995 (__v4si)(__m128i)(v1), (int)(scale)); })
3997 #define _mm256_i64scatter_ps(addr, index, v1, scale) __extension__ ({ \
3998 __builtin_ia32_scatterdiv8sf((float *)(addr), (__mmask8)-1, \
3999 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
4002 #define _mm256_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \
4003 __builtin_ia32_scatterdiv8sf((float *)(addr), (__mmask8)(mask), \
4004 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
4007 #define _mm256_i64scatter_epi32(addr, index, v1, scale) __extension__ ({ \
4008 __builtin_ia32_scatterdiv8si((int *)(addr), (__mmask8)-1, \
4009 (__v4di)(__m256i)(index), \
4010 (__v4si)(__m128i)(v1), (int)(scale)); })
4012 #define _mm256_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \
4013 __builtin_ia32_scatterdiv8si((int *)(addr), (__mmask8)(mask), \
4014 (__v4di)(__m256i)(index), \
4015 (__v4si)(__m128i)(v1), (int)(scale)); })
4017 #define _mm_i32scatter_pd(addr, index, v1, scale) __extension__ ({ \
4018 __builtin_ia32_scattersiv2df((double *)(addr), (__mmask8)-1, \
4019 (__v4si)(__m128i)(index), \
4020 (__v2df)(__m128d)(v1), (int)(scale)); })
4022 #define _mm_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \
4023 __builtin_ia32_scattersiv2df((double *)(addr), (__mmask8)(mask), \
4024 (__v4si)(__m128i)(index), \
4025 (__v2df)(__m128d)(v1), (int)(scale)); })
4027 #define _mm_i32scatter_epi64(addr, index, v1, scale) __extension__ ({ \
4028 __builtin_ia32_scattersiv2di((long long *)(addr), (__mmask8)-1, \
4029 (__v4si)(__m128i)(index), \
4030 (__v2di)(__m128i)(v1), (int)(scale)); })
4032 #define _mm_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \
4033 __builtin_ia32_scattersiv2di((long long *)(addr), (__mmask8)(mask), \
4034 (__v4si)(__m128i)(index), \
4035 (__v2di)(__m128i)(v1), (int)(scale)); })
4037 #define _mm256_i32scatter_pd(addr, index, v1, scale) __extension__ ({ \
4038 __builtin_ia32_scattersiv4df((double *)(addr), (__mmask8)-1, \
4039 (__v4si)(__m128i)(index), \
4040 (__v4df)(__m256d)(v1), (int)(scale)); })
4042 #define _mm256_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \
4043 __builtin_ia32_scattersiv4df((double *)(addr), (__mmask8)(mask), \
4044 (__v4si)(__m128i)(index), \
4045 (__v4df)(__m256d)(v1), (int)(scale)); })
4047 #define _mm256_i32scatter_epi64(addr, index, v1, scale) __extension__ ({ \
4048 __builtin_ia32_scattersiv4di((long long *)(addr), (__mmask8)-1, \
4049 (__v4si)(__m128i)(index), \
4050 (__v4di)(__m256i)(v1), (int)(scale)); })
4052 #define _mm256_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \
4053 __builtin_ia32_scattersiv4di((long long *)(addr), (__mmask8)(mask), \
4054 (__v4si)(__m128i)(index), \
4055 (__v4di)(__m256i)(v1), (int)(scale)); })
4057 #define _mm_i32scatter_ps(addr, index, v1, scale) __extension__ ({ \
4058 __builtin_ia32_scattersiv4sf((float *)(addr), (__mmask8)-1, \
4059 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
4062 #define _mm_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \
4063 __builtin_ia32_scattersiv4sf((float *)(addr), (__mmask8)(mask), \
4064 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
4067 #define _mm_i32scatter_epi32(addr, index, v1, scale) __extension__ ({ \
4068 __builtin_ia32_scattersiv4si((int *)(addr), (__mmask8)-1, \
4069 (__v4si)(__m128i)(index), \
4070 (__v4si)(__m128i)(v1), (int)(scale)); })
4072 #define _mm_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \
4073 __builtin_ia32_scattersiv4si((int *)(addr), (__mmask8)(mask), \
4074 (__v4si)(__m128i)(index), \
4075 (__v4si)(__m128i)(v1), (int)(scale)); })
4077 #define _mm256_i32scatter_ps(addr, index, v1, scale) __extension__ ({ \
4078 __builtin_ia32_scattersiv8sf((float *)(addr), (__mmask8)-1, \
4079 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
4082 #define _mm256_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \
4083 __builtin_ia32_scattersiv8sf((float *)(addr), (__mmask8)(mask), \
4084 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
4087 #define _mm256_i32scatter_epi32(addr, index, v1, scale) __extension__ ({ \
4088 __builtin_ia32_scattersiv8si((int *)(addr), (__mmask8)-1, \
4089 (__v8si)(__m256i)(index), \
4090 (__v8si)(__m256i)(v1), (int)(scale)); })
4092 #define _mm256_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \
4093 __builtin_ia32_scattersiv8si((int *)(addr), (__mmask8)(mask), \
4094 (__v8si)(__m256i)(index), \
4095 (__v8si)(__m256i)(v1), (int)(scale)); })
4097 static __inline__ __m128d __DEFAULT_FN_ATTRS
4098 _mm_mask_sqrt_pd (__m128d __W
, __mmask8 __U
, __m128d __A
) {
4099 return (__m128d
) __builtin_ia32_sqrtpd128_mask ((__v2df
) __A
,
4104 static __inline__ __m128d __DEFAULT_FN_ATTRS
4105 _mm_maskz_sqrt_pd (__mmask8 __U
, __m128d __A
) {
4106 return (__m128d
) __builtin_ia32_sqrtpd128_mask ((__v2df
) __A
,
4112 static __inline__ __m256d __DEFAULT_FN_ATTRS
4113 _mm256_mask_sqrt_pd (__m256d __W
, __mmask8 __U
, __m256d __A
) {
4114 return (__m256d
) __builtin_ia32_sqrtpd256_mask ((__v4df
) __A
,
4119 static __inline__ __m256d __DEFAULT_FN_ATTRS
4120 _mm256_maskz_sqrt_pd (__mmask8 __U
, __m256d __A
) {
4121 return (__m256d
) __builtin_ia32_sqrtpd256_mask ((__v4df
) __A
,
4123 _mm256_setzero_pd (),
4127 static __inline__ __m128 __DEFAULT_FN_ATTRS
4128 _mm_mask_sqrt_ps (__m128 __W
, __mmask8 __U
, __m128 __A
) {
4129 return (__m128
) __builtin_ia32_sqrtps128_mask ((__v4sf
) __A
,
4134 static __inline__ __m128 __DEFAULT_FN_ATTRS
4135 _mm_maskz_sqrt_ps (__mmask8 __U
, __m128 __A
) {
4136 return (__m128
) __builtin_ia32_sqrtps128_mask ((__v4sf
) __A
,
4142 static __inline__ __m256 __DEFAULT_FN_ATTRS
4143 _mm256_mask_sqrt_ps (__m256 __W
, __mmask8 __U
, __m256 __A
) {
4144 return (__m256
) __builtin_ia32_sqrtps256_mask ((__v8sf
) __A
,
4149 static __inline__ __m256 __DEFAULT_FN_ATTRS
4150 _mm256_maskz_sqrt_ps (__mmask8 __U
, __m256 __A
) {
4151 return (__m256
) __builtin_ia32_sqrtps256_mask ((__v8sf
) __A
,
4153 _mm256_setzero_ps (),
4157 static __inline__ __m128d __DEFAULT_FN_ATTRS
4158 _mm_mask_sub_pd (__m128d __W
, __mmask8 __U
, __m128d __A
, __m128d __B
) {
4159 return (__m128d
) __builtin_ia32_subpd128_mask ((__v2df
) __A
,
4165 static __inline__ __m128d __DEFAULT_FN_ATTRS
4166 _mm_maskz_sub_pd (__mmask8 __U
, __m128d __A
, __m128d __B
) {
4167 return (__m128d
) __builtin_ia32_subpd128_mask ((__v2df
) __A
,
4174 static __inline__ __m256d __DEFAULT_FN_ATTRS
4175 _mm256_mask_sub_pd (__m256d __W
, __mmask8 __U
, __m256d __A
,
4177 return (__m256d
) __builtin_ia32_subpd256_mask ((__v4df
) __A
,
4183 static __inline__ __m256d __DEFAULT_FN_ATTRS
4184 _mm256_maskz_sub_pd (__mmask8 __U
, __m256d __A
, __m256d __B
) {
4185 return (__m256d
) __builtin_ia32_subpd256_mask ((__v4df
) __A
,
4188 _mm256_setzero_pd (),
4192 static __inline__ __m128 __DEFAULT_FN_ATTRS
4193 _mm_mask_sub_ps (__m128 __W
, __mmask16 __U
, __m128 __A
, __m128 __B
) {
4194 return (__m128
) __builtin_ia32_subps128_mask ((__v4sf
) __A
,
4200 static __inline__ __m128 __DEFAULT_FN_ATTRS
4201 _mm_maskz_sub_ps (__mmask16 __U
, __m128 __A
, __m128 __B
) {
4202 return (__m128
) __builtin_ia32_subps128_mask ((__v4sf
) __A
,
4209 static __inline__ __m256 __DEFAULT_FN_ATTRS
4210 _mm256_mask_sub_ps (__m256 __W
, __mmask16 __U
, __m256 __A
, __m256 __B
) {
4211 return (__m256
) __builtin_ia32_subps256_mask ((__v8sf
) __A
,
4217 static __inline__ __m256 __DEFAULT_FN_ATTRS
4218 _mm256_maskz_sub_ps (__mmask16 __U
, __m256 __A
, __m256 __B
) {
4219 return (__m256
) __builtin_ia32_subps256_mask ((__v8sf
) __A
,
4222 _mm256_setzero_ps (),
4226 static __inline__ __m128i __DEFAULT_FN_ATTRS
4227 _mm_mask2_permutex2var_epi32 (__m128i __A
, __m128i __I
, __mmask8 __U
,
4229 return (__m128i
) __builtin_ia32_vpermi2vard128_mask ((__v4si
) __A
,
4236 static __inline__ __m256i __DEFAULT_FN_ATTRS
4237 _mm256_mask2_permutex2var_epi32 (__m256i __A
, __m256i __I
,
4238 __mmask8 __U
, __m256i __B
) {
4239 return (__m256i
) __builtin_ia32_vpermi2vard256_mask ((__v8si
) __A
,
4246 static __inline__ __m128d __DEFAULT_FN_ATTRS
4247 _mm_mask2_permutex2var_pd (__m128d __A
, __m128i __I
, __mmask8 __U
,
4249 return (__m128d
) __builtin_ia32_vpermi2varpd128_mask ((__v2df
) __A
,
4257 static __inline__ __m256d __DEFAULT_FN_ATTRS
4258 _mm256_mask2_permutex2var_pd (__m256d __A
, __m256i __I
, __mmask8 __U
,
4260 return (__m256d
) __builtin_ia32_vpermi2varpd256_mask ((__v4df
) __A
,
4268 static __inline__ __m128 __DEFAULT_FN_ATTRS
4269 _mm_mask2_permutex2var_ps (__m128 __A
, __m128i __I
, __mmask8 __U
,
4271 return (__m128
) __builtin_ia32_vpermi2varps128_mask ((__v4sf
) __A
,
4278 static __inline__ __m256 __DEFAULT_FN_ATTRS
4279 _mm256_mask2_permutex2var_ps (__m256 __A
, __m256i __I
, __mmask8 __U
,
4281 return (__m256
) __builtin_ia32_vpermi2varps256_mask ((__v8sf
) __A
,
4288 static __inline__ __m128i __DEFAULT_FN_ATTRS
4289 _mm_mask2_permutex2var_epi64 (__m128i __A
, __m128i __I
, __mmask8 __U
,
4291 return (__m128i
) __builtin_ia32_vpermi2varq128_mask ((__v2di
) __A
,
4298 static __inline__ __m256i __DEFAULT_FN_ATTRS
4299 _mm256_mask2_permutex2var_epi64 (__m256i __A
, __m256i __I
,
4300 __mmask8 __U
, __m256i __B
) {
4301 return (__m256i
) __builtin_ia32_vpermi2varq256_mask ((__v4di
) __A
,
4308 static __inline__ __m128i __DEFAULT_FN_ATTRS
4309 _mm_permutex2var_epi32 (__m128i __A
, __m128i __I
, __m128i __B
) {
4310 return (__m128i
) __builtin_ia32_vpermt2vard128_mask ((__v4si
) __I
4317 static __inline__ __m128i __DEFAULT_FN_ATTRS
4318 _mm_mask_permutex2var_epi32 (__m128i __A
, __mmask8 __U
, __m128i __I
,
4320 return (__m128i
) __builtin_ia32_vpermt2vard128_mask ((__v4si
) __I
4327 static __inline__ __m128i __DEFAULT_FN_ATTRS
4328 _mm_maskz_permutex2var_epi32 (__mmask8 __U
, __m128i __A
, __m128i __I
,
4330 return (__m128i
) __builtin_ia32_vpermt2vard128_maskz ((__v4si
) __I
4338 static __inline__ __m256i __DEFAULT_FN_ATTRS
4339 _mm256_permutex2var_epi32 (__m256i __A
, __m256i __I
, __m256i __B
) {
4340 return (__m256i
) __builtin_ia32_vpermt2vard256_mask ((__v8si
) __I
4347 static __inline__ __m256i __DEFAULT_FN_ATTRS
4348 _mm256_mask_permutex2var_epi32 (__m256i __A
, __mmask8 __U
, __m256i __I
,
4350 return (__m256i
) __builtin_ia32_vpermt2vard256_mask ((__v8si
) __I
4357 static __inline__ __m256i __DEFAULT_FN_ATTRS
4358 _mm256_maskz_permutex2var_epi32 (__mmask8 __U
, __m256i __A
,
4359 __m256i __I
, __m256i __B
) {
4360 return (__m256i
) __builtin_ia32_vpermt2vard256_maskz ((__v8si
) __I
4368 static __inline__ __m128d __DEFAULT_FN_ATTRS
4369 _mm_permutex2var_pd (__m128d __A
, __m128i __I
, __m128d __B
) {
4370 return (__m128d
) __builtin_ia32_vpermt2varpd128_mask ((__v2di
) __I
4378 static __inline__ __m128d __DEFAULT_FN_ATTRS
4379 _mm_mask_permutex2var_pd (__m128d __A
, __mmask8 __U
, __m128i __I
,
4381 return (__m128d
) __builtin_ia32_vpermt2varpd128_mask ((__v2di
) __I
4389 static __inline__ __m128d __DEFAULT_FN_ATTRS
4390 _mm_maskz_permutex2var_pd (__mmask8 __U
, __m128d __A
, __m128i __I
,
4392 return (__m128d
) __builtin_ia32_vpermt2varpd128_maskz ((__v2di
) __I
4400 static __inline__ __m256d __DEFAULT_FN_ATTRS
4401 _mm256_permutex2var_pd (__m256d __A
, __m256i __I
, __m256d __B
) {
4402 return (__m256d
) __builtin_ia32_vpermt2varpd256_mask ((__v4di
) __I
4410 static __inline__ __m256d __DEFAULT_FN_ATTRS
4411 _mm256_mask_permutex2var_pd (__m256d __A
, __mmask8 __U
, __m256i __I
,
4413 return (__m256d
) __builtin_ia32_vpermt2varpd256_mask ((__v4di
) __I
4421 static __inline__ __m256d __DEFAULT_FN_ATTRS
4422 _mm256_maskz_permutex2var_pd (__mmask8 __U
, __m256d __A
, __m256i __I
,
4424 return (__m256d
) __builtin_ia32_vpermt2varpd256_maskz ((__v4di
) __I
4432 static __inline__ __m128 __DEFAULT_FN_ATTRS
4433 _mm_permutex2var_ps (__m128 __A
, __m128i __I
, __m128 __B
) {
4434 return (__m128
) __builtin_ia32_vpermt2varps128_mask ((__v4si
) __I
4441 static __inline__ __m128 __DEFAULT_FN_ATTRS
4442 _mm_mask_permutex2var_ps (__m128 __A
, __mmask8 __U
, __m128i __I
,
4444 return (__m128
) __builtin_ia32_vpermt2varps128_mask ((__v4si
) __I
4451 static __inline__ __m128 __DEFAULT_FN_ATTRS
4452 _mm_maskz_permutex2var_ps (__mmask8 __U
, __m128 __A
, __m128i __I
,
4454 return (__m128
) __builtin_ia32_vpermt2varps128_maskz ((__v4si
) __I
4462 static __inline__ __m256 __DEFAULT_FN_ATTRS
4463 _mm256_permutex2var_ps (__m256 __A
, __m256i __I
, __m256 __B
) {
4464 return (__m256
) __builtin_ia32_vpermt2varps256_mask ((__v8si
) __I
4471 static __inline__ __m256 __DEFAULT_FN_ATTRS
4472 _mm256_mask_permutex2var_ps (__m256 __A
, __mmask8 __U
, __m256i __I
,
4474 return (__m256
) __builtin_ia32_vpermt2varps256_mask ((__v8si
) __I
4481 static __inline__ __m256 __DEFAULT_FN_ATTRS
4482 _mm256_maskz_permutex2var_ps (__mmask8 __U
, __m256 __A
, __m256i __I
,
4484 return (__m256
) __builtin_ia32_vpermt2varps256_maskz ((__v8si
) __I
4492 static __inline__ __m128i __DEFAULT_FN_ATTRS
4493 _mm_permutex2var_epi64 (__m128i __A
, __m128i __I
, __m128i __B
) {
4494 return (__m128i
) __builtin_ia32_vpermt2varq128_mask ((__v2di
) __I
4501 static __inline__ __m128i __DEFAULT_FN_ATTRS
4502 _mm_mask_permutex2var_epi64 (__m128i __A
, __mmask8 __U
, __m128i __I
,
4504 return (__m128i
) __builtin_ia32_vpermt2varq128_mask ((__v2di
) __I
4511 static __inline__ __m128i __DEFAULT_FN_ATTRS
4512 _mm_maskz_permutex2var_epi64 (__mmask8 __U
, __m128i __A
, __m128i __I
,
4514 return (__m128i
) __builtin_ia32_vpermt2varq128_maskz ((__v2di
) __I
4523 static __inline__ __m256i __DEFAULT_FN_ATTRS
4524 _mm256_permutex2var_epi64 (__m256i __A
, __m256i __I
, __m256i __B
) {
4525 return (__m256i
) __builtin_ia32_vpermt2varq256_mask ((__v4di
) __I
4532 static __inline__ __m256i __DEFAULT_FN_ATTRS
4533 _mm256_mask_permutex2var_epi64 (__m256i __A
, __mmask8 __U
, __m256i __I
,
4535 return (__m256i
) __builtin_ia32_vpermt2varq256_mask ((__v4di
) __I
4542 static __inline__ __m256i __DEFAULT_FN_ATTRS
4543 _mm256_maskz_permutex2var_epi64 (__mmask8 __U
, __m256i __A
,
4544 __m256i __I
, __m256i __B
) {
4545 return (__m256i
) __builtin_ia32_vpermt2varq256_maskz ((__v4di
) __I
4553 static __inline__ __m128i __DEFAULT_FN_ATTRS
4554 _mm_mask_cvtepi8_epi32 (__m128i __W
, __mmask8 __U
, __m128i __A
)
4556 return (__m128i
) __builtin_ia32_pmovsxbd128_mask ((__v16qi
) __A
,
4561 static __inline__ __m128i __DEFAULT_FN_ATTRS
4562 _mm_maskz_cvtepi8_epi32 (__mmask8 __U
, __m128i __A
)
4564 return (__m128i
) __builtin_ia32_pmovsxbd128_mask ((__v16qi
) __A
,
4566 _mm_setzero_si128 (),
4570 static __inline__ __m256i __DEFAULT_FN_ATTRS
4571 _mm256_mask_cvtepi8_epi32 (__m256i __W
, __mmask8 __U
, __m128i __A
)
4573 return (__m256i
) __builtin_ia32_pmovsxbd256_mask ((__v16qi
) __A
,
4578 static __inline__ __m256i __DEFAULT_FN_ATTRS
4579 _mm256_maskz_cvtepi8_epi32 (__mmask8 __U
, __m128i __A
)
4581 return (__m256i
) __builtin_ia32_pmovsxbd256_mask ((__v16qi
) __A
,
4583 _mm256_setzero_si256 (),
4587 static __inline__ __m128i __DEFAULT_FN_ATTRS
4588 _mm_mask_cvtepi8_epi64 (__m128i __W
, __mmask8 __U
, __m128i __A
)
4590 return (__m128i
) __builtin_ia32_pmovsxbq128_mask ((__v16qi
) __A
,
4595 static __inline__ __m128i __DEFAULT_FN_ATTRS
4596 _mm_maskz_cvtepi8_epi64 (__mmask8 __U
, __m128i __A
)
4598 return (__m128i
) __builtin_ia32_pmovsxbq128_mask ((__v16qi
) __A
,
4600 _mm_setzero_si128 (),
4604 static __inline__ __m256i __DEFAULT_FN_ATTRS
4605 _mm256_mask_cvtepi8_epi64 (__m256i __W
, __mmask8 __U
, __m128i __A
)
4607 return (__m256i
) __builtin_ia32_pmovsxbq256_mask ((__v16qi
) __A
,
4612 static __inline__ __m256i __DEFAULT_FN_ATTRS
4613 _mm256_maskz_cvtepi8_epi64 (__mmask8 __U
, __m128i __A
)
4615 return (__m256i
) __builtin_ia32_pmovsxbq256_mask ((__v16qi
) __A
,
4617 _mm256_setzero_si256 (),
4621 static __inline__ __m128i __DEFAULT_FN_ATTRS
4622 _mm_mask_cvtepi32_epi64 (__m128i __W
, __mmask8 __U
, __m128i __X
)
4624 return (__m128i
) __builtin_ia32_pmovsxdq128_mask ((__v4si
) __X
,
4629 static __inline__ __m128i __DEFAULT_FN_ATTRS
4630 _mm_maskz_cvtepi32_epi64 (__mmask8 __U
, __m128i __X
)
4632 return (__m128i
) __builtin_ia32_pmovsxdq128_mask ((__v4si
) __X
,
4634 _mm_setzero_si128 (),
4638 static __inline__ __m256i __DEFAULT_FN_ATTRS
4639 _mm256_mask_cvtepi32_epi64 (__m256i __W
, __mmask8 __U
, __m128i __X
)
4641 return (__m256i
) __builtin_ia32_pmovsxdq256_mask ((__v4si
) __X
,
4646 static __inline__ __m256i __DEFAULT_FN_ATTRS
4647 _mm256_maskz_cvtepi32_epi64 (__mmask8 __U
, __m128i __X
)
4649 return (__m256i
) __builtin_ia32_pmovsxdq256_mask ((__v4si
) __X
,
4651 _mm256_setzero_si256 (),
4655 static __inline__ __m128i __DEFAULT_FN_ATTRS
4656 _mm_mask_cvtepi16_epi32 (__m128i __W
, __mmask8 __U
, __m128i __A
)
4658 return (__m128i
) __builtin_ia32_pmovsxwd128_mask ((__v8hi
) __A
,
4663 static __inline__ __m128i __DEFAULT_FN_ATTRS
4664 _mm_maskz_cvtepi16_epi32 (__mmask8 __U
, __m128i __A
)
4666 return (__m128i
) __builtin_ia32_pmovsxwd128_mask ((__v8hi
) __A
,
4668 _mm_setzero_si128 (),
4672 static __inline__ __m256i __DEFAULT_FN_ATTRS
4673 _mm256_mask_cvtepi16_epi32 (__m256i __W
, __mmask8 __U
, __m128i __A
)
4675 return (__m256i
) __builtin_ia32_pmovsxwd256_mask ((__v8hi
) __A
,
4680 static __inline__ __m256i __DEFAULT_FN_ATTRS
4681 _mm256_maskz_cvtepi16_epi32 (__mmask8 __U
, __m128i __A
)
4683 return (__m256i
) __builtin_ia32_pmovsxwd256_mask ((__v8hi
) __A
,
4685 _mm256_setzero_si256 (),
4689 static __inline__ __m128i __DEFAULT_FN_ATTRS
4690 _mm_mask_cvtepi16_epi64 (__m128i __W
, __mmask8 __U
, __m128i __A
)
4692 return (__m128i
) __builtin_ia32_pmovsxwq128_mask ((__v8hi
) __A
,
4697 static __inline__ __m128i __DEFAULT_FN_ATTRS
4698 _mm_maskz_cvtepi16_epi64 (__mmask8 __U
, __m128i __A
)
4700 return (__m128i
) __builtin_ia32_pmovsxwq128_mask ((__v8hi
) __A
,
4702 _mm_setzero_si128 (),
4706 static __inline__ __m256i __DEFAULT_FN_ATTRS
4707 _mm256_mask_cvtepi16_epi64 (__m256i __W
, __mmask8 __U
, __m128i __A
)
4709 return (__m256i
) __builtin_ia32_pmovsxwq256_mask ((__v8hi
) __A
,
4714 static __inline__ __m256i __DEFAULT_FN_ATTRS
4715 _mm256_maskz_cvtepi16_epi64 (__mmask8 __U
, __m128i __A
)
4717 return (__m256i
) __builtin_ia32_pmovsxwq256_mask ((__v8hi
) __A
,
4719 _mm256_setzero_si256 (),
4724 static __inline__ __m128i __DEFAULT_FN_ATTRS
4725 _mm_mask_cvtepu8_epi32 (__m128i __W
, __mmask8 __U
, __m128i __A
)
4727 return (__m128i
) __builtin_ia32_pmovzxbd128_mask ((__v16qi
) __A
,
4732 static __inline__ __m128i __DEFAULT_FN_ATTRS
4733 _mm_maskz_cvtepu8_epi32 (__mmask8 __U
, __m128i __A
)
4735 return (__m128i
) __builtin_ia32_pmovzxbd128_mask ((__v16qi
) __A
,
4737 _mm_setzero_si128 (),
4741 static __inline__ __m256i __DEFAULT_FN_ATTRS
4742 _mm256_mask_cvtepu8_epi32 (__m256i __W
, __mmask8 __U
, __m128i __A
)
4744 return (__m256i
) __builtin_ia32_pmovzxbd256_mask ((__v16qi
) __A
,
4749 static __inline__ __m256i __DEFAULT_FN_ATTRS
4750 _mm256_maskz_cvtepu8_epi32 (__mmask8 __U
, __m128i __A
)
4752 return (__m256i
) __builtin_ia32_pmovzxbd256_mask ((__v16qi
) __A
,
4754 _mm256_setzero_si256 (),
4758 static __inline__ __m128i __DEFAULT_FN_ATTRS
4759 _mm_mask_cvtepu8_epi64 (__m128i __W
, __mmask8 __U
, __m128i __A
)
4761 return (__m128i
) __builtin_ia32_pmovzxbq128_mask ((__v16qi
) __A
,
4766 static __inline__ __m128i __DEFAULT_FN_ATTRS
4767 _mm_maskz_cvtepu8_epi64 (__mmask8 __U
, __m128i __A
)
4769 return (__m128i
) __builtin_ia32_pmovzxbq128_mask ((__v16qi
) __A
,
4771 _mm_setzero_si128 (),
4775 static __inline__ __m256i __DEFAULT_FN_ATTRS
4776 _mm256_mask_cvtepu8_epi64 (__m256i __W
, __mmask8 __U
, __m128i __A
)
4778 return (__m256i
) __builtin_ia32_pmovzxbq256_mask ((__v16qi
) __A
,
4783 static __inline__ __m256i __DEFAULT_FN_ATTRS
4784 _mm256_maskz_cvtepu8_epi64 (__mmask8 __U
, __m128i __A
)
4786 return (__m256i
) __builtin_ia32_pmovzxbq256_mask ((__v16qi
) __A
,
4788 _mm256_setzero_si256 (),
4792 static __inline__ __m128i __DEFAULT_FN_ATTRS
4793 _mm_mask_cvtepu32_epi64 (__m128i __W
, __mmask8 __U
, __m128i __X
)
4795 return (__m128i
) __builtin_ia32_pmovzxdq128_mask ((__v4si
) __X
,
4800 static __inline__ __m128i __DEFAULT_FN_ATTRS
4801 _mm_maskz_cvtepu32_epi64 (__mmask8 __U
, __m128i __X
)
4803 return (__m128i
) __builtin_ia32_pmovzxdq128_mask ((__v4si
) __X
,
4805 _mm_setzero_si128 (),
4809 static __inline__ __m256i __DEFAULT_FN_ATTRS
4810 _mm256_mask_cvtepu32_epi64 (__m256i __W
, __mmask8 __U
, __m128i __X
)
4812 return (__m256i
) __builtin_ia32_pmovzxdq256_mask ((__v4si
) __X
,
4817 static __inline__ __m256i __DEFAULT_FN_ATTRS
4818 _mm256_maskz_cvtepu32_epi64 (__mmask8 __U
, __m128i __X
)
4820 return (__m256i
) __builtin_ia32_pmovzxdq256_mask ((__v4si
) __X
,
4822 _mm256_setzero_si256 (),
4826 static __inline__ __m128i __DEFAULT_FN_ATTRS
4827 _mm_mask_cvtepu16_epi32 (__m128i __W
, __mmask8 __U
, __m128i __A
)
4829 return (__m128i
) __builtin_ia32_pmovzxwd128_mask ((__v8hi
) __A
,
4834 static __inline__ __m128i __DEFAULT_FN_ATTRS
4835 _mm_maskz_cvtepu16_epi32 (__mmask8 __U
, __m128i __A
)
4837 return (__m128i
) __builtin_ia32_pmovzxwd128_mask ((__v8hi
) __A
,
4839 _mm_setzero_si128 (),
4843 static __inline__ __m256i __DEFAULT_FN_ATTRS
4844 _mm256_mask_cvtepu16_epi32 (__m256i __W
, __mmask8 __U
, __m128i __A
)
4846 return (__m256i
) __builtin_ia32_pmovzxwd256_mask ((__v8hi
) __A
,
4851 static __inline__ __m256i __DEFAULT_FN_ATTRS
4852 _mm256_maskz_cvtepu16_epi32 (__mmask8 __U
, __m128i __A
)
4854 return (__m256i
) __builtin_ia32_pmovzxwd256_mask ((__v8hi
) __A
,
4856 _mm256_setzero_si256 (),
4860 static __inline__ __m128i __DEFAULT_FN_ATTRS
4861 _mm_mask_cvtepu16_epi64 (__m128i __W
, __mmask8 __U
, __m128i __A
)
4863 return (__m128i
) __builtin_ia32_pmovzxwq128_mask ((__v8hi
) __A
,
4868 static __inline__ __m128i __DEFAULT_FN_ATTRS
4869 _mm_maskz_cvtepu16_epi64 (__mmask8 __U
, __m128i __A
)
4871 return (__m128i
) __builtin_ia32_pmovzxwq128_mask ((__v8hi
) __A
,
4873 _mm_setzero_si128 (),
4877 static __inline__ __m256i __DEFAULT_FN_ATTRS
4878 _mm256_mask_cvtepu16_epi64 (__m256i __W
, __mmask8 __U
, __m128i __A
)
4880 return (__m256i
) __builtin_ia32_pmovzxwq256_mask ((__v8hi
) __A
,
4885 static __inline__ __m256i __DEFAULT_FN_ATTRS
4886 _mm256_maskz_cvtepu16_epi64 (__mmask8 __U
, __m128i __A
)
4888 return (__m256i
) __builtin_ia32_pmovzxwq256_mask ((__v8hi
) __A
,
4890 _mm256_setzero_si256 (),
4895 #define _mm_rol_epi32(a, b) __extension__ ({\
4896 (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \
4897 (__v4si)_mm_setzero_si128(), \
4900 #define _mm_mask_rol_epi32(w, u, a, b) __extension__ ({\
4901 (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \
4902 (__v4si)(__m128i)(w), (__mmask8)(u)); })
4904 #define _mm_maskz_rol_epi32(u, a, b) __extension__ ({\
4905 (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \
4906 (__v4si)_mm_setzero_si128(), \
4909 #define _mm256_rol_epi32(a, b) __extension__ ({\
4910 (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \
4911 (__v8si)_mm256_setzero_si256(), \
4914 #define _mm256_mask_rol_epi32(w, u, a, b) __extension__ ({\
4915 (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \
4916 (__v8si)(__m256i)(w), (__mmask8)(u)); })
4918 #define _mm256_maskz_rol_epi32(u, a, b) __extension__ ({\
4919 (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \
4920 (__v8si)_mm256_setzero_si256(), \
4923 #define _mm_rol_epi64(a, b) __extension__ ({\
4924 (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \
4925 (__v2di)_mm_setzero_di(), \
4928 #define _mm_mask_rol_epi64(w, u, a, b) __extension__ ({\
4929 (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \
4930 (__v2di)(__m128i)(w), (__mmask8)(u)); })
4932 #define _mm_maskz_rol_epi64(u, a, b) __extension__ ({\
4933 (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \
4934 (__v2di)_mm_setzero_di(), \
4937 #define _mm256_rol_epi64(a, b) __extension__ ({\
4938 (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \
4939 (__v4di)_mm256_setzero_si256(), \
4942 #define _mm256_mask_rol_epi64(w, u, a, b) __extension__ ({\
4943 (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \
4944 (__v4di)(__m256i)(w), (__mmask8)(u)); })
4946 #define _mm256_maskz_rol_epi64(u, a, b) __extension__ ({\
4947 (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \
4948 (__v4di)_mm256_setzero_si256(), \
4951 static __inline__ __m128i __DEFAULT_FN_ATTRS
4952 _mm_rolv_epi32 (__m128i __A
, __m128i __B
)
4954 return (__m128i
) __builtin_ia32_prolvd128_mask ((__v4si
) __A
,
4957 _mm_setzero_si128 (),
4961 static __inline__ __m128i __DEFAULT_FN_ATTRS
4962 _mm_mask_rolv_epi32 (__m128i __W
, __mmask8 __U
, __m128i __A
,
4965 return (__m128i
) __builtin_ia32_prolvd128_mask ((__v4si
) __A
,
4971 static __inline__ __m128i __DEFAULT_FN_ATTRS
4972 _mm_maskz_rolv_epi32 (__mmask8 __U
, __m128i __A
, __m128i __B
)
4974 return (__m128i
) __builtin_ia32_prolvd128_mask ((__v4si
) __A
,
4977 _mm_setzero_si128 (),
4981 static __inline__ __m256i __DEFAULT_FN_ATTRS
4982 _mm256_rolv_epi32 (__m256i __A
, __m256i __B
)
4984 return (__m256i
) __builtin_ia32_prolvd256_mask ((__v8si
) __A
,
4987 _mm256_setzero_si256 (),
4991 static __inline__ __m256i __DEFAULT_FN_ATTRS
4992 _mm256_mask_rolv_epi32 (__m256i __W
, __mmask8 __U
, __m256i __A
,
4995 return (__m256i
) __builtin_ia32_prolvd256_mask ((__v8si
) __A
,
5001 static __inline__ __m256i __DEFAULT_FN_ATTRS
5002 _mm256_maskz_rolv_epi32 (__mmask8 __U
, __m256i __A
, __m256i __B
)
5004 return (__m256i
) __builtin_ia32_prolvd256_mask ((__v8si
) __A
,
5007 _mm256_setzero_si256 (),
5011 static __inline__ __m128i __DEFAULT_FN_ATTRS
5012 _mm_rolv_epi64 (__m128i __A
, __m128i __B
)
5014 return (__m128i
) __builtin_ia32_prolvq128_mask ((__v2di
) __A
,
5021 static __inline__ __m128i __DEFAULT_FN_ATTRS
5022 _mm_mask_rolv_epi64 (__m128i __W
, __mmask8 __U
, __m128i __A
,
5025 return (__m128i
) __builtin_ia32_prolvq128_mask ((__v2di
) __A
,
5031 static __inline__ __m128i __DEFAULT_FN_ATTRS
5032 _mm_maskz_rolv_epi64 (__mmask8 __U
, __m128i __A
, __m128i __B
)
5034 return (__m128i
) __builtin_ia32_prolvq128_mask ((__v2di
) __A
,
5041 static __inline__ __m256i __DEFAULT_FN_ATTRS
5042 _mm256_rolv_epi64 (__m256i __A
, __m256i __B
)
5044 return (__m256i
) __builtin_ia32_prolvq256_mask ((__v4di
) __A
,
5047 _mm256_setzero_si256 (),
5051 static __inline__ __m256i __DEFAULT_FN_ATTRS
5052 _mm256_mask_rolv_epi64 (__m256i __W
, __mmask8 __U
, __m256i __A
,
5055 return (__m256i
) __builtin_ia32_prolvq256_mask ((__v4di
) __A
,
5061 static __inline__ __m256i __DEFAULT_FN_ATTRS
5062 _mm256_maskz_rolv_epi64 (__mmask8 __U
, __m256i __A
, __m256i __B
)
5064 return (__m256i
) __builtin_ia32_prolvq256_mask ((__v4di
) __A
,
5067 _mm256_setzero_si256 (),
5071 #define _mm_ror_epi32(A, B) __extension__ ({ \
5072 (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \
5073 (__v4si)_mm_setzero_si128(), \
5076 #define _mm_mask_ror_epi32(W, U, A, B) __extension__ ({ \
5077 (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \
5078 (__v4si)(__m128i)(W), (__mmask8)(U)); })
5080 #define _mm_maskz_ror_epi32(U, A, B) __extension__ ({ \
5081 (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \
5082 (__v4si)_mm_setzero_si128(), \
5085 #define _mm256_ror_epi32(A, B) __extension__ ({ \
5086 (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \
5087 (__v8si)_mm256_setzero_si256(), \
5090 #define _mm256_mask_ror_epi32(W, U, A, B) __extension__ ({ \
5091 (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \
5092 (__v8si)(__m256i)(W), (__mmask8)(U)); })
5094 #define _mm256_maskz_ror_epi32(U, A, B) __extension__ ({ \
5095 (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \
5096 (__v8si)_mm256_setzero_si256(), \
5099 #define _mm_ror_epi64(A, B) __extension__ ({ \
5100 (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \
5101 (__v2di)_mm_setzero_di(), \
5104 #define _mm_mask_ror_epi64(W, U, A, B) __extension__ ({ \
5105 (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \
5106 (__v2di)(__m128i)(W), (__mmask8)(U)); })
5108 #define _mm_maskz_ror_epi64(U, A, B) __extension__ ({ \
5109 (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \
5110 (__v2di)_mm_setzero_di(), \
5113 #define _mm256_ror_epi64(A, B) __extension__ ({ \
5114 (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \
5115 (__v4di)_mm256_setzero_si256(), \
5118 #define _mm256_mask_ror_epi64(W, U, A, B) __extension__ ({ \
5119 (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \
5120 (__v4di)(__m256i)(W), (__mmask8)(U)); })
5122 #define _mm256_maskz_ror_epi64(U, A, B) __extension__ ({ \
5123 (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \
5124 (__v4di)_mm256_setzero_si256(), \
5127 static __inline__ __m128i __DEFAULT_FN_ATTRS
5128 _mm_mask_sll_epi32 (__m128i __W
, __mmask8 __U
, __m128i __A
,
5131 return (__m128i
) __builtin_ia32_pslld128_mask ((__v4si
) __A
,
5137 static __inline__ __m128i __DEFAULT_FN_ATTRS
5138 _mm_maskz_sll_epi32 (__mmask8 __U
, __m128i __A
, __m128i __B
)
5140 return (__m128i
) __builtin_ia32_pslld128_mask ((__v4si
) __A
,
5143 _mm_setzero_si128 (),
5147 static __inline__ __m256i __DEFAULT_FN_ATTRS
5148 _mm256_mask_sll_epi32 (__m256i __W
, __mmask8 __U
, __m256i __A
,
5151 return (__m256i
) __builtin_ia32_pslld256_mask ((__v8si
) __A
,
5157 static __inline__ __m256i __DEFAULT_FN_ATTRS
5158 _mm256_maskz_sll_epi32 (__mmask8 __U
, __m256i __A
, __m128i __B
)
5160 return (__m256i
) __builtin_ia32_pslld256_mask ((__v8si
) __A
,
5163 _mm256_setzero_si256 (),
5167 #define _mm_mask_slli_epi32(W, U, A, B) __extension__ ({ \
5168 (__m128i)__builtin_ia32_pslldi128_mask((__v4si)(__m128i)(A), (int)(B), \
5169 (__v4si)(__m128i)(W), \
5172 #define _mm_maskz_slli_epi32(U, A, B) __extension__ ({ \
5173 (__m128i)__builtin_ia32_pslldi128_mask((__v4si)(__m128i)(A), (int)(B), \
5174 (__v4si)_mm_setzero_si128(), \
5177 #define _mm256_mask_slli_epi32(W, U, A, B) __extension__ ({ \
5178 (__m256i)__builtin_ia32_pslldi256_mask((__v8si)(__m256i)(A), (int)(B), \
5179 (__v8si)(__m256i)(W), \
5182 #define _mm256_maskz_slli_epi32(U, A, B) __extension__ ({ \
5183 (__m256i)__builtin_ia32_pslldi256_mask((__v8si)(__m256i)(A), (int)(B), \
5184 (__v8si)_mm256_setzero_si256(), \
5187 static __inline__ __m128i __DEFAULT_FN_ATTRS
5188 _mm_mask_sll_epi64 (__m128i __W
, __mmask8 __U
, __m128i __A
,
5191 return (__m128i
) __builtin_ia32_psllq128_mask ((__v2di
) __A
,
5197 static __inline__ __m128i __DEFAULT_FN_ATTRS
5198 _mm_maskz_sll_epi64 (__mmask8 __U
, __m128i __A
, __m128i __B
)
5200 return (__m128i
) __builtin_ia32_psllq128_mask ((__v2di
) __A
,
5207 static __inline__ __m256i __DEFAULT_FN_ATTRS
5208 _mm256_mask_sll_epi64 (__m256i __W
, __mmask8 __U
, __m256i __A
,
5211 return (__m256i
) __builtin_ia32_psllq256_mask ((__v4di
) __A
,
5217 static __inline__ __m256i __DEFAULT_FN_ATTRS
5218 _mm256_maskz_sll_epi64 (__mmask8 __U
, __m256i __A
, __m128i __B
)
5220 return (__m256i
) __builtin_ia32_psllq256_mask ((__v4di
) __A
,
5223 _mm256_setzero_si256 (),
5227 #define _mm_mask_slli_epi64(W, U, A, B) __extension__ ({ \
5228 (__m128i)__builtin_ia32_psllqi128_mask((__v2di)(__m128i)(A), (int)(B), \
5229 (__v2di)(__m128i)(W), \
5232 #define _mm_maskz_slli_epi64(U, A, B) __extension__ ({ \
5233 (__m128i)__builtin_ia32_psllqi128_mask((__v2di)(__m128i)(A), (int)(B), \
5234 (__v2di)_mm_setzero_di(), \
5237 #define _mm256_mask_slli_epi64(W, U, A, B) __extension__ ({ \
5238 (__m256i)__builtin_ia32_psllqi256_mask((__v4di)(__m256i)(A), (int)(B), \
5239 (__v4di)(__m256i)(W), \
5242 #define _mm256_maskz_slli_epi64(U, A, B) __extension__ ({ \
5243 (__m256i)__builtin_ia32_psllqi256_mask((__v4di)(__m256i)(A), (int)(B), \
5244 (__v4di)_mm256_setzero_si256(), \
5248 static __inline__ __m128i __DEFAULT_FN_ATTRS
5249 _mm_rorv_epi32 (__m128i __A
, __m128i __B
)
5251 return (__m128i
) __builtin_ia32_prorvd128_mask ((__v4si
) __A
,
5254 _mm_setzero_si128 (),
5258 static __inline__ __m128i __DEFAULT_FN_ATTRS
5259 _mm_mask_rorv_epi32 (__m128i __W
, __mmask8 __U
, __m128i __A
,
5262 return (__m128i
) __builtin_ia32_prorvd128_mask ((__v4si
) __A
,
5268 static __inline__ __m128i __DEFAULT_FN_ATTRS
5269 _mm_maskz_rorv_epi32 (__mmask8 __U
, __m128i __A
, __m128i __B
)
5271 return (__m128i
) __builtin_ia32_prorvd128_mask ((__v4si
) __A
,
5274 _mm_setzero_si128 (),
5278 static __inline__ __m256i __DEFAULT_FN_ATTRS
5279 _mm256_rorv_epi32 (__m256i __A
, __m256i __B
)
5281 return (__m256i
) __builtin_ia32_prorvd256_mask ((__v8si
) __A
,
5284 _mm256_setzero_si256 (),
5288 static __inline__ __m256i __DEFAULT_FN_ATTRS
5289 _mm256_mask_rorv_epi32 (__m256i __W
, __mmask8 __U
, __m256i __A
,
5292 return (__m256i
) __builtin_ia32_prorvd256_mask ((__v8si
) __A
,
5298 static __inline__ __m256i __DEFAULT_FN_ATTRS
5299 _mm256_maskz_rorv_epi32 (__mmask8 __U
, __m256i __A
, __m256i __B
)
5301 return (__m256i
) __builtin_ia32_prorvd256_mask ((__v8si
) __A
,
5304 _mm256_setzero_si256 (),
5308 static __inline__ __m128i __DEFAULT_FN_ATTRS
5309 _mm_rorv_epi64 (__m128i __A
, __m128i __B
)
5311 return (__m128i
) __builtin_ia32_prorvq128_mask ((__v2di
) __A
,
5318 static __inline__ __m128i __DEFAULT_FN_ATTRS
5319 _mm_mask_rorv_epi64 (__m128i __W
, __mmask8 __U
, __m128i __A
,
5322 return (__m128i
) __builtin_ia32_prorvq128_mask ((__v2di
) __A
,
5328 static __inline__ __m128i __DEFAULT_FN_ATTRS
5329 _mm_maskz_rorv_epi64 (__mmask8 __U
, __m128i __A
, __m128i __B
)
5331 return (__m128i
) __builtin_ia32_prorvq128_mask ((__v2di
) __A
,
5338 static __inline__ __m256i __DEFAULT_FN_ATTRS
5339 _mm256_rorv_epi64 (__m256i __A
, __m256i __B
)
5341 return (__m256i
) __builtin_ia32_prorvq256_mask ((__v4di
) __A
,
5344 _mm256_setzero_si256 (),
5348 static __inline__ __m256i __DEFAULT_FN_ATTRS
5349 _mm256_mask_rorv_epi64 (__m256i __W
, __mmask8 __U
, __m256i __A
,
5352 return (__m256i
) __builtin_ia32_prorvq256_mask ((__v4di
) __A
,
5358 static __inline__ __m256i __DEFAULT_FN_ATTRS
5359 _mm256_maskz_rorv_epi64 (__mmask8 __U
, __m256i __A
, __m256i __B
)
5361 return (__m256i
) __builtin_ia32_prorvq256_mask ((__v4di
) __A
,
5364 _mm256_setzero_si256 (),
5368 static __inline__ __m128i __DEFAULT_FN_ATTRS
5369 _mm_mask_sllv_epi64 (__m128i __W
, __mmask8 __U
, __m128i __X
,
5372 return (__m128i
) __builtin_ia32_psllv2di_mask ((__v2di
) __X
,
5378 static __inline__ __m128i __DEFAULT_FN_ATTRS
5379 _mm_maskz_sllv_epi64 (__mmask8 __U
, __m128i __X
, __m128i __Y
)
5381 return (__m128i
) __builtin_ia32_psllv2di_mask ((__v2di
) __X
,
5388 static __inline__ __m256i __DEFAULT_FN_ATTRS
5389 _mm256_mask_sllv_epi64 (__m256i __W
, __mmask8 __U
, __m256i __X
,
5392 return (__m256i
) __builtin_ia32_psllv4di_mask ((__v4di
) __X
,
5398 static __inline__ __m256i __DEFAULT_FN_ATTRS
5399 _mm256_maskz_sllv_epi64 (__mmask8 __U
, __m256i __X
, __m256i __Y
)
5401 return (__m256i
) __builtin_ia32_psllv4di_mask ((__v4di
) __X
,
5404 _mm256_setzero_si256 (),
5408 static __inline__ __m128i __DEFAULT_FN_ATTRS
5409 _mm_mask_sllv_epi32 (__m128i __W
, __mmask8 __U
, __m128i __X
,
5412 return (__m128i
) __builtin_ia32_psllv4si_mask ((__v4si
) __X
,
5418 static __inline__ __m128i __DEFAULT_FN_ATTRS
5419 _mm_maskz_sllv_epi32 (__mmask8 __U
, __m128i __X
, __m128i __Y
)
5421 return (__m128i
) __builtin_ia32_psllv4si_mask ((__v4si
) __X
,
5424 _mm_setzero_si128 (),
5428 static __inline__ __m256i __DEFAULT_FN_ATTRS
5429 _mm256_mask_sllv_epi32 (__m256i __W
, __mmask8 __U
, __m256i __X
,
5432 return (__m256i
) __builtin_ia32_psllv8si_mask ((__v8si
) __X
,
5438 static __inline__ __m256i __DEFAULT_FN_ATTRS
5439 _mm256_maskz_sllv_epi32 (__mmask8 __U
, __m256i __X
, __m256i __Y
)
5441 return (__m256i
) __builtin_ia32_psllv8si_mask ((__v8si
) __X
,
5444 _mm256_setzero_si256 (),
5450 static __inline__ __m128i __DEFAULT_FN_ATTRS
5451 _mm_mask_srlv_epi64 (__m128i __W
, __mmask8 __U
, __m128i __X
,
5454 return (__m128i
) __builtin_ia32_psrlv2di_mask ((__v2di
) __X
,
5460 static __inline__ __m128i __DEFAULT_FN_ATTRS
5461 _mm_maskz_srlv_epi64 (__mmask8 __U
, __m128i __X
, __m128i __Y
)
5463 return (__m128i
) __builtin_ia32_psrlv2di_mask ((__v2di
) __X
,
5470 static __inline__ __m256i __DEFAULT_FN_ATTRS
5471 _mm256_mask_srlv_epi64 (__m256i __W
, __mmask8 __U
, __m256i __X
,
5474 return (__m256i
) __builtin_ia32_psrlv4di_mask ((__v4di
) __X
,
5480 static __inline__ __m256i __DEFAULT_FN_ATTRS
5481 _mm256_maskz_srlv_epi64 (__mmask8 __U
, __m256i __X
, __m256i __Y
)
5483 return (__m256i
) __builtin_ia32_psrlv4di_mask ((__v4di
) __X
,
5486 _mm256_setzero_si256 (),
5490 static __inline__ __m128i __DEFAULT_FN_ATTRS
5491 _mm_mask_srlv_epi32 (__m128i __W
, __mmask8 __U
, __m128i __X
,
5494 return (__m128i
) __builtin_ia32_psrlv4si_mask ((__v4si
) __X
,
5500 static __inline__ __m128i __DEFAULT_FN_ATTRS
5501 _mm_maskz_srlv_epi32 (__mmask8 __U
, __m128i __X
, __m128i __Y
)
5503 return (__m128i
) __builtin_ia32_psrlv4si_mask ((__v4si
) __X
,
5506 _mm_setzero_si128 (),
5510 static __inline__ __m256i __DEFAULT_FN_ATTRS
5511 _mm256_mask_srlv_epi32 (__m256i __W
, __mmask8 __U
, __m256i __X
,
5514 return (__m256i
) __builtin_ia32_psrlv8si_mask ((__v8si
) __X
,
5520 static __inline__ __m256i __DEFAULT_FN_ATTRS
5521 _mm256_maskz_srlv_epi32 (__mmask8 __U
, __m256i __X
, __m256i __Y
)
5523 return (__m256i
) __builtin_ia32_psrlv8si_mask ((__v8si
) __X
,
5526 _mm256_setzero_si256 (),
5532 static __inline__ __m128i __DEFAULT_FN_ATTRS
5533 _mm_mask_srl_epi32 (__m128i __W
, __mmask8 __U
, __m128i __A
,
5536 return (__m128i
) __builtin_ia32_psrld128_mask ((__v4si
) __A
,
5542 static __inline__ __m128i __DEFAULT_FN_ATTRS
5543 _mm_maskz_srl_epi32 (__mmask8 __U
, __m128i __A
, __m128i __B
)
5545 return (__m128i
) __builtin_ia32_psrld128_mask ((__v4si
) __A
,
5548 _mm_setzero_si128 (),
5552 static __inline__ __m256i __DEFAULT_FN_ATTRS
5553 _mm256_mask_srl_epi32 (__m256i __W
, __mmask8 __U
, __m256i __A
,
5556 return (__m256i
) __builtin_ia32_psrld256_mask ((__v8si
) __A
,
5562 static __inline__ __m256i __DEFAULT_FN_ATTRS
5563 _mm256_maskz_srl_epi32 (__mmask8 __U
, __m256i __A
, __m128i __B
)
5565 return (__m256i
) __builtin_ia32_psrld256_mask ((__v8si
) __A
,
5568 _mm256_setzero_si256 (),
5572 #define _mm_mask_srli_epi32(W, U, A, imm) __extension__ ({ \
5573 (__m128i)__builtin_ia32_psrldi128_mask((__v4si)(__m128i)(A), (int)(imm), \
5574 (__v4si)(__m128i)(W), \
5577 #define _mm_maskz_srli_epi32(U, A, imm) __extension__ ({ \
5578 (__m128i)__builtin_ia32_psrldi128_mask((__v4si)(__m128i)(A), (int)(imm), \
5579 (__v4si)_mm_setzero_si128(), \
5582 #define _mm256_mask_srli_epi32(W, U, A, imm) __extension__ ({ \
5583 (__m256i)__builtin_ia32_psrldi256_mask((__v8si)(__m256i)(A), (int)(imm), \
5584 (__v8si)(__m256i)(W), \
5587 #define _mm256_maskz_srli_epi32(U, A, imm) __extension__ ({ \
5588 (__m256i)__builtin_ia32_psrldi256_mask((__v8si)(__m256i)(A), (int)(imm), \
5589 (__v8si)_mm256_setzero_si256(), \
5592 static __inline__ __m128i __DEFAULT_FN_ATTRS
5593 _mm_mask_srl_epi64 (__m128i __W
, __mmask8 __U
, __m128i __A
,
5596 return (__m128i
) __builtin_ia32_psrlq128_mask ((__v2di
) __A
,
5602 static __inline__ __m128i __DEFAULT_FN_ATTRS
5603 _mm_maskz_srl_epi64 (__mmask8 __U
, __m128i __A
, __m128i __B
)
5605 return (__m128i
) __builtin_ia32_psrlq128_mask ((__v2di
) __A
,
5612 static __inline__ __m256i __DEFAULT_FN_ATTRS
5613 _mm256_mask_srl_epi64 (__m256i __W
, __mmask8 __U
, __m256i __A
,
5616 return (__m256i
) __builtin_ia32_psrlq256_mask ((__v4di
) __A
,
5622 static __inline__ __m256i __DEFAULT_FN_ATTRS
5623 _mm256_maskz_srl_epi64 (__mmask8 __U
, __m256i __A
, __m128i __B
)
5625 return (__m256i
) __builtin_ia32_psrlq256_mask ((__v4di
) __A
,
5628 _mm256_setzero_si256 (),
5632 #define _mm_mask_srli_epi64(W, U, A, imm) __extension__ ({ \
5633 (__m128i)__builtin_ia32_psrlqi128_mask((__v2di)(__m128i)(A), (int)(imm), \
5634 (__v2di)(__m128i)(W), \
5637 #define _mm_maskz_srli_epi64(U, A, imm) __extension__ ({ \
5638 (__m128i)__builtin_ia32_psrlqi128_mask((__v2di)(__m128i)(A), (int)(imm), \
5639 (__v2di)_mm_setzero_si128(), \
5642 #define _mm256_mask_srli_epi64(W, U, A, imm) __extension__ ({ \
5643 (__m256i)__builtin_ia32_psrlqi256_mask((__v4di)(__m256i)(A), (int)(imm), \
5644 (__v4di)(__m256i)(W), \
5647 #define _mm256_maskz_srli_epi64(U, A, imm) __extension__ ({ \
5648 (__m256i)__builtin_ia32_psrlqi256_mask((__v4di)(__m256i)(A), (int)(imm), \
5649 (__v4di)_mm256_setzero_si256(), \
5652 static __inline__ __m128i __DEFAULT_FN_ATTRS
5653 _mm_mask_srav_epi32 (__m128i __W
, __mmask8 __U
, __m128i __X
,
5656 return (__m128i
) __builtin_ia32_psrav4si_mask ((__v4si
) __X
,
5662 static __inline__ __m128i __DEFAULT_FN_ATTRS
5663 _mm_maskz_srav_epi32 (__mmask8 __U
, __m128i __X
, __m128i __Y
)
5665 return (__m128i
) __builtin_ia32_psrav4si_mask ((__v4si
) __X
,
5668 _mm_setzero_si128 (),
5672 static __inline__ __m256i __DEFAULT_FN_ATTRS
5673 _mm256_mask_srav_epi32 (__m256i __W
, __mmask8 __U
, __m256i __X
,
5676 return (__m256i
) __builtin_ia32_psrav8si_mask ((__v8si
) __X
,
5682 static __inline__ __m256i __DEFAULT_FN_ATTRS
5683 _mm256_maskz_srav_epi32 (__mmask8 __U
, __m256i __X
, __m256i __Y
)
5685 return (__m256i
) __builtin_ia32_psrav8si_mask ((__v8si
) __X
,
5688 _mm256_setzero_si256 (),
5692 static __inline__ __m128i __DEFAULT_FN_ATTRS
5693 _mm_srav_epi64 (__m128i __X
, __m128i __Y
)
5695 return (__m128i
) __builtin_ia32_psravq128_mask ((__v2di
) __X
,
5702 static __inline__ __m128i __DEFAULT_FN_ATTRS
5703 _mm_mask_srav_epi64 (__m128i __W
, __mmask8 __U
, __m128i __X
,
5706 return (__m128i
) __builtin_ia32_psravq128_mask ((__v2di
) __X
,
5712 static __inline__ __m128i __DEFAULT_FN_ATTRS
5713 _mm_maskz_srav_epi64 (__mmask8 __U
, __m128i __X
, __m128i __Y
)
5715 return (__m128i
) __builtin_ia32_psravq128_mask ((__v2di
) __X
,
5722 static __inline__ __m256i __DEFAULT_FN_ATTRS
5723 _mm256_srav_epi64 (__m256i __X
, __m256i __Y
)
5725 return (__m256i
) __builtin_ia32_psravq256_mask ((__v4di
) __X
,
5728 _mm256_setzero_si256 (),
5732 static __inline__ __m256i __DEFAULT_FN_ATTRS
5733 _mm256_mask_srav_epi64 (__m256i __W
, __mmask8 __U
, __m256i __X
,
5736 return (__m256i
) __builtin_ia32_psravq256_mask ((__v4di
) __X
,
5742 static __inline__ __m256i __DEFAULT_FN_ATTRS
5743 _mm256_maskz_srav_epi64 (__mmask8 __U
, __m256i __X
, __m256i __Y
)
5745 return (__m256i
) __builtin_ia32_psravq256_mask ((__v4di
) __X
,
5748 _mm256_setzero_si256 (),
5752 static __inline__ __m128i __DEFAULT_FN_ATTRS
5753 _mm_mask_mov_epi32 (__m128i __W
, __mmask8 __U
, __m128i __A
)
5755 return (__m128i
) __builtin_ia32_selectd_128 ((__mmask8
) __U
,
5760 static __inline__ __m128i __DEFAULT_FN_ATTRS
5761 _mm_maskz_mov_epi32 (__mmask8 __U
, __m128i __A
)
5763 return (__m128i
) __builtin_ia32_selectd_128 ((__mmask8
) __U
,
5765 (__v4si
) _mm_setzero_si128 ());
5769 static __inline__ __m256i __DEFAULT_FN_ATTRS
5770 _mm256_mask_mov_epi32 (__m256i __W
, __mmask8 __U
, __m256i __A
)
5772 return (__m256i
) __builtin_ia32_selectd_256 ((__mmask8
) __U
,
5777 static __inline__ __m256i __DEFAULT_FN_ATTRS
5778 _mm256_maskz_mov_epi32 (__mmask8 __U
, __m256i __A
)
5780 return (__m256i
) __builtin_ia32_selectd_256 ((__mmask8
) __U
,
5782 (__v8si
) _mm256_setzero_si256 ());
5785 static __inline__ __m128i __DEFAULT_FN_ATTRS
5786 _mm_mask_load_epi32 (__m128i __W
, __mmask8 __U
, void const *__P
)
5788 return (__m128i
) __builtin_ia32_movdqa32load128_mask ((__v4si
*) __P
,
5794 static __inline__ __m128i __DEFAULT_FN_ATTRS
5795 _mm_maskz_load_epi32 (__mmask8 __U
, void const *__P
)
5797 return (__m128i
) __builtin_ia32_movdqa32load128_mask ((__v4si
*) __P
,
5799 _mm_setzero_si128 (),
5804 static __inline__ __m256i __DEFAULT_FN_ATTRS
5805 _mm256_mask_load_epi32 (__m256i __W
, __mmask8 __U
, void const *__P
)
5807 return (__m256i
) __builtin_ia32_movdqa32load256_mask ((__v8si
*) __P
,
5813 static __inline__ __m256i __DEFAULT_FN_ATTRS
5814 _mm256_maskz_load_epi32 (__mmask8 __U
, void const *__P
)
5816 return (__m256i
) __builtin_ia32_movdqa32load256_mask ((__v8si
*) __P
,
5818 _mm256_setzero_si256 (),
5823 static __inline__
void __DEFAULT_FN_ATTRS
5824 _mm_mask_store_epi32 (void *__P
, __mmask8 __U
, __m128i __A
)
5826 __builtin_ia32_movdqa32store128_mask ((__v4si
*) __P
,
5831 static __inline__
void __DEFAULT_FN_ATTRS
5832 _mm256_mask_store_epi32 (void *__P
, __mmask8 __U
, __m256i __A
)
5834 __builtin_ia32_movdqa32store256_mask ((__v8si
*) __P
,
5839 static __inline__ __m128i __DEFAULT_FN_ATTRS
5840 _mm_mask_mov_epi64 (__m128i __W
, __mmask8 __U
, __m128i __A
)
5842 return (__m128i
) __builtin_ia32_selectq_128 ((__mmask8
) __U
,
5847 static __inline__ __m128i __DEFAULT_FN_ATTRS
5848 _mm_maskz_mov_epi64 (__mmask8 __U
, __m128i __A
)
5850 return (__m128i
) __builtin_ia32_selectq_128 ((__mmask8
) __U
,
5852 (__v2di
) _mm_setzero_di ());
5855 static __inline__ __m256i __DEFAULT_FN_ATTRS
5856 _mm256_mask_mov_epi64 (__m256i __W
, __mmask8 __U
, __m256i __A
)
5858 return (__m256i
) __builtin_ia32_selectq_256 ((__mmask8
) __U
,
5863 static __inline__ __m256i __DEFAULT_FN_ATTRS
5864 _mm256_maskz_mov_epi64 (__mmask8 __U
, __m256i __A
)
5866 return (__m256i
) __builtin_ia32_selectq_256 ((__mmask8
) __U
,
5868 (__v4di
) _mm256_setzero_si256 ());
5871 static __inline__ __m128i __DEFAULT_FN_ATTRS
5872 _mm_mask_load_epi64 (__m128i __W
, __mmask8 __U
, void const *__P
)
5874 return (__m128i
) __builtin_ia32_movdqa64load128_mask ((__v2di
*) __P
,
5880 static __inline__ __m128i __DEFAULT_FN_ATTRS
5881 _mm_maskz_load_epi64 (__mmask8 __U
, void const *__P
)
5883 return (__m128i
) __builtin_ia32_movdqa64load128_mask ((__v2di
*) __P
,
5890 static __inline__ __m256i __DEFAULT_FN_ATTRS
5891 _mm256_mask_load_epi64 (__m256i __W
, __mmask8 __U
, void const *__P
)
5893 return (__m256i
) __builtin_ia32_movdqa64load256_mask ((__v4di
*) __P
,
5899 static __inline__ __m256i __DEFAULT_FN_ATTRS
5900 _mm256_maskz_load_epi64 (__mmask8 __U
, void const *__P
)
5902 return (__m256i
) __builtin_ia32_movdqa64load256_mask ((__v4di
*) __P
,
5904 _mm256_setzero_si256 (),
5909 static __inline__
void __DEFAULT_FN_ATTRS
5910 _mm_mask_store_epi64 (void *__P
, __mmask8 __U
, __m128i __A
)
5912 __builtin_ia32_movdqa64store128_mask ((__v2di
*) __P
,
5917 static __inline__
void __DEFAULT_FN_ATTRS
5918 _mm256_mask_store_epi64 (void *__P
, __mmask8 __U
, __m256i __A
)
5920 __builtin_ia32_movdqa64store256_mask ((__v4di
*) __P
,
5925 static __inline__ __m128d __DEFAULT_FN_ATTRS
5926 _mm_mask_movedup_pd (__m128d __W
, __mmask8 __U
, __m128d __A
)
5928 return (__m128d
)__builtin_ia32_selectpd_128((__mmask8
)__U
,
5929 (__v2df
)_mm_movedup_pd(__A
),
5933 static __inline__ __m128d __DEFAULT_FN_ATTRS
5934 _mm_maskz_movedup_pd (__mmask8 __U
, __m128d __A
)
5936 return (__m128d
)__builtin_ia32_selectpd_128((__mmask8
)__U
,
5937 (__v2df
)_mm_movedup_pd(__A
),
5938 (__v2df
)_mm_setzero_pd());
5941 static __inline__ __m256d __DEFAULT_FN_ATTRS
5942 _mm256_mask_movedup_pd (__m256d __W
, __mmask8 __U
, __m256d __A
)
5944 return (__m256d
)__builtin_ia32_selectpd_256((__mmask8
)__U
,
5945 (__v4df
)_mm256_movedup_pd(__A
),
5949 static __inline__ __m256d __DEFAULT_FN_ATTRS
5950 _mm256_maskz_movedup_pd (__mmask8 __U
, __m256d __A
)
5952 return (__m256d
)__builtin_ia32_selectpd_256((__mmask8
)__U
,
5953 (__v4df
)_mm256_movedup_pd(__A
),
5954 (__v4df
)_mm256_setzero_pd());
5958 #define _mm_mask_set1_epi32(O, M, A) __extension__ ({ \
5959 (__m128i)__builtin_ia32_pbroadcastd128_gpr_mask((int)(A), \
5960 (__v4si)(__m128i)(O), \
5963 #define _mm_maskz_set1_epi32(M, A) __extension__ ({ \
5964 (__m128i)__builtin_ia32_pbroadcastd128_gpr_mask((int)(A), \
5965 (__v4si)_mm_setzero_si128(), \
5968 #define _mm256_mask_set1_epi32(O, M, A) __extension__ ({ \
5969 (__m256i)__builtin_ia32_pbroadcastd256_gpr_mask((int)(A), \
5970 (__v8si)(__m256i)(O), \
5973 #define _mm256_maskz_set1_epi32(M, A) __extension__ ({ \
5974 (__m256i)__builtin_ia32_pbroadcastd256_gpr_mask((int)(A), \
5975 (__v8si)_mm256_setzero_si256(), \
5978 static __inline__ __m128i __DEFAULT_FN_ATTRS
5979 _mm_mask_set1_epi64 (__m128i __O
, __mmask8 __M
, long long __A
)
5981 return (__m128i
) __builtin_ia32_pbroadcastq128_gpr_mask (__A
, (__v2di
) __O
,
5985 static __inline__ __m128i __DEFAULT_FN_ATTRS
5986 _mm_maskz_set1_epi64 (__mmask8 __M
, long long __A
)
5988 return (__m128i
) __builtin_ia32_pbroadcastq128_gpr_mask (__A
,
5990 _mm_setzero_si128 (),
5994 static __inline__ __m256i __DEFAULT_FN_ATTRS
5995 _mm256_mask_set1_epi64 (__m256i __O
, __mmask8 __M
, long long __A
)
5997 return (__m256i
) __builtin_ia32_pbroadcastq256_gpr_mask (__A
, (__v4di
) __O
,
6001 static __inline__ __m256i __DEFAULT_FN_ATTRS
6002 _mm256_maskz_set1_epi64 (__mmask8 __M
, long long __A
)
6004 return (__m256i
) __builtin_ia32_pbroadcastq256_gpr_mask (__A
,
6006 _mm256_setzero_si256 (),
6010 #define _mm_fixupimm_pd(A, B, C, imm) __extension__ ({ \
6011 (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
6012 (__v2df)(__m128d)(B), \
6013 (__v2di)(__m128i)(C), (int)(imm), \
6016 #define _mm_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \
6017 (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
6018 (__v2df)(__m128d)(B), \
6019 (__v2di)(__m128i)(C), (int)(imm), \
6022 #define _mm_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \
6023 (__m128d)__builtin_ia32_fixupimmpd128_maskz((__v2df)(__m128d)(A), \
6024 (__v2df)(__m128d)(B), \
6025 (__v2di)(__m128i)(C), \
6026 (int)(imm), (__mmask8)(U)); })
6028 #define _mm256_fixupimm_pd(A, B, C, imm) __extension__ ({ \
6029 (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
6030 (__v4df)(__m256d)(B), \
6031 (__v4di)(__m256i)(C), (int)(imm), \
6034 #define _mm256_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \
6035 (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
6036 (__v4df)(__m256d)(B), \
6037 (__v4di)(__m256i)(C), (int)(imm), \
6040 #define _mm256_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \
6041 (__m256d)__builtin_ia32_fixupimmpd256_maskz((__v4df)(__m256d)(A), \
6042 (__v4df)(__m256d)(B), \
6043 (__v4di)(__m256i)(C), \
6044 (int)(imm), (__mmask8)(U)); })
6046 #define _mm_fixupimm_ps(A, B, C, imm) __extension__ ({ \
6047 (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
6048 (__v4sf)(__m128)(B), \
6049 (__v4si)(__m128i)(C), (int)(imm), \
6052 #define _mm_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \
6053 (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
6054 (__v4sf)(__m128)(B), \
6055 (__v4si)(__m128i)(C), (int)(imm), \
6058 #define _mm_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \
6059 (__m128)__builtin_ia32_fixupimmps128_maskz((__v4sf)(__m128)(A), \
6060 (__v4sf)(__m128)(B), \
6061 (__v4si)(__m128i)(C), (int)(imm), \
6064 #define _mm256_fixupimm_ps(A, B, C, imm) __extension__ ({ \
6065 (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
6066 (__v8sf)(__m256)(B), \
6067 (__v8si)(__m256i)(C), (int)(imm), \
6070 #define _mm256_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \
6071 (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
6072 (__v8sf)(__m256)(B), \
6073 (__v8si)(__m256i)(C), (int)(imm), \
6076 #define _mm256_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \
6077 (__m256)__builtin_ia32_fixupimmps256_maskz((__v8sf)(__m256)(A), \
6078 (__v8sf)(__m256)(B), \
6079 (__v8si)(__m256i)(C), (int)(imm), \
6082 static __inline__ __m128d __DEFAULT_FN_ATTRS
6083 _mm_mask_load_pd (__m128d __W
, __mmask8 __U
, void const *__P
)
6085 return (__m128d
) __builtin_ia32_loadapd128_mask ((__v2df
*) __P
,
6090 static __inline__ __m128d __DEFAULT_FN_ATTRS
6091 _mm_maskz_load_pd (__mmask8 __U
, void const *__P
)
6093 return (__m128d
) __builtin_ia32_loadapd128_mask ((__v2df
*) __P
,
6099 static __inline__ __m256d __DEFAULT_FN_ATTRS
6100 _mm256_mask_load_pd (__m256d __W
, __mmask8 __U
, void const *__P
)
6102 return (__m256d
) __builtin_ia32_loadapd256_mask ((__v4df
*) __P
,
6107 static __inline__ __m256d __DEFAULT_FN_ATTRS
6108 _mm256_maskz_load_pd (__mmask8 __U
, void const *__P
)
6110 return (__m256d
) __builtin_ia32_loadapd256_mask ((__v4df
*) __P
,
6112 _mm256_setzero_pd (),
6116 static __inline__ __m128 __DEFAULT_FN_ATTRS
6117 _mm_mask_load_ps (__m128 __W
, __mmask8 __U
, void const *__P
)
6119 return (__m128
) __builtin_ia32_loadaps128_mask ((__v4sf
*) __P
,
6124 static __inline__ __m128 __DEFAULT_FN_ATTRS
6125 _mm_maskz_load_ps (__mmask8 __U
, void const *__P
)
6127 return (__m128
) __builtin_ia32_loadaps128_mask ((__v4sf
*) __P
,
6133 static __inline__ __m256 __DEFAULT_FN_ATTRS
6134 _mm256_mask_load_ps (__m256 __W
, __mmask8 __U
, void const *__P
)
6136 return (__m256
) __builtin_ia32_loadaps256_mask ((__v8sf
*) __P
,
6141 static __inline__ __m256 __DEFAULT_FN_ATTRS
6142 _mm256_maskz_load_ps (__mmask8 __U
, void const *__P
)
6144 return (__m256
) __builtin_ia32_loadaps256_mask ((__v8sf
*) __P
,
6146 _mm256_setzero_ps (),
6150 static __inline__ __m128i __DEFAULT_FN_ATTRS
6151 _mm_mask_loadu_epi64 (__m128i __W
, __mmask8 __U
, void const *__P
)
6153 return (__m128i
) __builtin_ia32_loaddqudi128_mask ((__v2di
*) __P
,
6158 static __inline__ __m128i __DEFAULT_FN_ATTRS
6159 _mm_maskz_loadu_epi64 (__mmask8 __U
, void const *__P
)
6161 return (__m128i
) __builtin_ia32_loaddqudi128_mask ((__v2di
*) __P
,
6163 _mm_setzero_si128 (),
6167 static __inline__ __m256i __DEFAULT_FN_ATTRS
6168 _mm256_mask_loadu_epi64 (__m256i __W
, __mmask8 __U
, void const *__P
)
6170 return (__m256i
) __builtin_ia32_loaddqudi256_mask ((__v4di
*) __P
,
6175 static __inline__ __m256i __DEFAULT_FN_ATTRS
6176 _mm256_maskz_loadu_epi64 (__mmask8 __U
, void const *__P
)
6178 return (__m256i
) __builtin_ia32_loaddqudi256_mask ((__v4di
*) __P
,
6180 _mm256_setzero_si256 (),
6184 static __inline__ __m128i __DEFAULT_FN_ATTRS
6185 _mm_mask_loadu_epi32 (__m128i __W
, __mmask8 __U
, void const *__P
)
6187 return (__m128i
) __builtin_ia32_loaddqusi128_mask ((__v4si
*) __P
,
6192 static __inline__ __m128i __DEFAULT_FN_ATTRS
6193 _mm_maskz_loadu_epi32 (__mmask8 __U
, void const *__P
)
6195 return (__m128i
) __builtin_ia32_loaddqusi128_mask ((__v4si
*) __P
,
6197 _mm_setzero_si128 (),
6201 static __inline__ __m256i __DEFAULT_FN_ATTRS
6202 _mm256_mask_loadu_epi32 (__m256i __W
, __mmask8 __U
, void const *__P
)
6204 return (__m256i
) __builtin_ia32_loaddqusi256_mask ((__v8si
*) __P
,
6209 static __inline__ __m256i __DEFAULT_FN_ATTRS
6210 _mm256_maskz_loadu_epi32 (__mmask8 __U
, void const *__P
)
6212 return (__m256i
) __builtin_ia32_loaddqusi256_mask ((__v8si
*) __P
,
6214 _mm256_setzero_si256 (),
6218 static __inline__ __m128d __DEFAULT_FN_ATTRS
6219 _mm_mask_loadu_pd (__m128d __W
, __mmask8 __U
, void const *__P
)
6221 return (__m128d
) __builtin_ia32_loadupd128_mask ((__v2df
*) __P
,
6226 static __inline__ __m128d __DEFAULT_FN_ATTRS
6227 _mm_maskz_loadu_pd (__mmask8 __U
, void const *__P
)
6229 return (__m128d
) __builtin_ia32_loadupd128_mask ((__v2df
*) __P
,
6235 static __inline__ __m256d __DEFAULT_FN_ATTRS
6236 _mm256_mask_loadu_pd (__m256d __W
, __mmask8 __U
, void const *__P
)
6238 return (__m256d
) __builtin_ia32_loadupd256_mask ((__v4df
*) __P
,
6243 static __inline__ __m256d __DEFAULT_FN_ATTRS
6244 _mm256_maskz_loadu_pd (__mmask8 __U
, void const *__P
)
6246 return (__m256d
) __builtin_ia32_loadupd256_mask ((__v4df
*) __P
,
6248 _mm256_setzero_pd (),
6252 static __inline__ __m128 __DEFAULT_FN_ATTRS
6253 _mm_mask_loadu_ps (__m128 __W
, __mmask8 __U
, void const *__P
)
6255 return (__m128
) __builtin_ia32_loadups128_mask ((__v4sf
*) __P
,
6260 static __inline__ __m128 __DEFAULT_FN_ATTRS
6261 _mm_maskz_loadu_ps (__mmask8 __U
, void const *__P
)
6263 return (__m128
) __builtin_ia32_loadups128_mask ((__v4sf
*) __P
,
6269 static __inline__ __m256 __DEFAULT_FN_ATTRS
6270 _mm256_mask_loadu_ps (__m256 __W
, __mmask8 __U
, void const *__P
)
6272 return (__m256
) __builtin_ia32_loadups256_mask ((__v8sf
*) __P
,
6277 static __inline__ __m256 __DEFAULT_FN_ATTRS
6278 _mm256_maskz_loadu_ps (__mmask8 __U
, void const *__P
)
6280 return (__m256
) __builtin_ia32_loadups256_mask ((__v8sf
*) __P
,
6282 _mm256_setzero_ps (),
6286 static __inline__
void __DEFAULT_FN_ATTRS
6287 _mm_mask_store_pd (void *__P
, __mmask8 __U
, __m128d __A
)
6289 __builtin_ia32_storeapd128_mask ((__v2df
*) __P
,
6294 static __inline__
void __DEFAULT_FN_ATTRS
6295 _mm256_mask_store_pd (void *__P
, __mmask8 __U
, __m256d __A
)
6297 __builtin_ia32_storeapd256_mask ((__v4df
*) __P
,
6302 static __inline__
void __DEFAULT_FN_ATTRS
6303 _mm_mask_store_ps (void *__P
, __mmask8 __U
, __m128 __A
)
6305 __builtin_ia32_storeaps128_mask ((__v4sf
*) __P
,
6310 static __inline__
void __DEFAULT_FN_ATTRS
6311 _mm256_mask_store_ps (void *__P
, __mmask8 __U
, __m256 __A
)
6313 __builtin_ia32_storeaps256_mask ((__v8sf
*) __P
,
6318 static __inline__
void __DEFAULT_FN_ATTRS
6319 _mm_mask_storeu_epi64 (void *__P
, __mmask8 __U
, __m128i __A
)
6321 __builtin_ia32_storedqudi128_mask ((__v2di
*) __P
,
6326 static __inline__
void __DEFAULT_FN_ATTRS
6327 _mm256_mask_storeu_epi64 (void *__P
, __mmask8 __U
, __m256i __A
)
6329 __builtin_ia32_storedqudi256_mask ((__v4di
*) __P
,
6334 static __inline__
void __DEFAULT_FN_ATTRS
6335 _mm_mask_storeu_epi32 (void *__P
, __mmask8 __U
, __m128i __A
)
6337 __builtin_ia32_storedqusi128_mask ((__v4si
*) __P
,
6342 static __inline__
void __DEFAULT_FN_ATTRS
6343 _mm256_mask_storeu_epi32 (void *__P
, __mmask8 __U
, __m256i __A
)
6345 __builtin_ia32_storedqusi256_mask ((__v8si
*) __P
,
6350 static __inline__
void __DEFAULT_FN_ATTRS
6351 _mm_mask_storeu_pd (void *__P
, __mmask8 __U
, __m128d __A
)
6353 __builtin_ia32_storeupd128_mask ((__v2df
*) __P
,
6358 static __inline__
void __DEFAULT_FN_ATTRS
6359 _mm256_mask_storeu_pd (void *__P
, __mmask8 __U
, __m256d __A
)
6361 __builtin_ia32_storeupd256_mask ((__v4df
*) __P
,
6366 static __inline__
void __DEFAULT_FN_ATTRS
6367 _mm_mask_storeu_ps (void *__P
, __mmask8 __U
, __m128 __A
)
6369 __builtin_ia32_storeups128_mask ((__v4sf
*) __P
,
6374 static __inline__
void __DEFAULT_FN_ATTRS
6375 _mm256_mask_storeu_ps (void *__P
, __mmask8 __U
, __m256 __A
)
6377 __builtin_ia32_storeups256_mask ((__v8sf
*) __P
,
6383 static __inline__ __m128d __DEFAULT_FN_ATTRS
6384 _mm_mask_unpackhi_pd(__m128d __W
, __mmask8 __U
, __m128d __A
, __m128d __B
)
6386 return (__m128d
)__builtin_ia32_selectpd_128((__mmask8
)__U
,
6387 (__v2df
)_mm_unpackhi_pd(__A
, __B
),
6391 static __inline__ __m128d __DEFAULT_FN_ATTRS
6392 _mm_maskz_unpackhi_pd(__mmask8 __U
, __m128d __A
, __m128d __B
)
6394 return (__m128d
)__builtin_ia32_selectpd_128((__mmask8
)__U
,
6395 (__v2df
)_mm_unpackhi_pd(__A
, __B
),
6396 (__v2df
)_mm_setzero_pd());
6399 static __inline__ __m256d __DEFAULT_FN_ATTRS
6400 _mm256_mask_unpackhi_pd(__m256d __W
, __mmask8 __U
, __m256d __A
, __m256d __B
)
6402 return (__m256d
)__builtin_ia32_selectpd_256((__mmask8
)__U
,
6403 (__v4df
)_mm256_unpackhi_pd(__A
, __B
),
6407 static __inline__ __m256d __DEFAULT_FN_ATTRS
6408 _mm256_maskz_unpackhi_pd(__mmask8 __U
, __m256d __A
, __m256d __B
)
6410 return (__m256d
)__builtin_ia32_selectpd_256((__mmask8
)__U
,
6411 (__v4df
)_mm256_unpackhi_pd(__A
, __B
),
6412 (__v4df
)_mm256_setzero_pd());
6415 static __inline__ __m128 __DEFAULT_FN_ATTRS
6416 _mm_mask_unpackhi_ps(__m128 __W
, __mmask8 __U
, __m128 __A
, __m128 __B
)
6418 return (__m128
)__builtin_ia32_selectps_128((__mmask8
)__U
,
6419 (__v4sf
)_mm_unpackhi_ps(__A
, __B
),
6423 static __inline__ __m128 __DEFAULT_FN_ATTRS
6424 _mm_maskz_unpackhi_ps(__mmask8 __U
, __m128 __A
, __m128 __B
)
6426 return (__m128
)__builtin_ia32_selectps_128((__mmask8
)__U
,
6427 (__v4sf
)_mm_unpackhi_ps(__A
, __B
),
6428 (__v4sf
)_mm_setzero_ps());
6431 static __inline__ __m256 __DEFAULT_FN_ATTRS
6432 _mm256_mask_unpackhi_ps(__m256 __W
, __mmask8 __U
, __m256 __A
, __m256 __B
)
6434 return (__m256
)__builtin_ia32_selectps_256((__mmask8
)__U
,
6435 (__v8sf
)_mm256_unpackhi_ps(__A
, __B
),
6439 static __inline__ __m256 __DEFAULT_FN_ATTRS
6440 _mm256_maskz_unpackhi_ps(__mmask8 __U
, __m256 __A
, __m256 __B
)
6442 return (__m256
)__builtin_ia32_selectps_256((__mmask8
)__U
,
6443 (__v8sf
)_mm256_unpackhi_ps(__A
, __B
),
6444 (__v8sf
)_mm256_setzero_ps());
6447 static __inline__ __m128d __DEFAULT_FN_ATTRS
6448 _mm_mask_unpacklo_pd(__m128d __W
, __mmask8 __U
, __m128d __A
, __m128d __B
)
6450 return (__m128d
)__builtin_ia32_selectpd_128((__mmask8
)__U
,
6451 (__v2df
)_mm_unpacklo_pd(__A
, __B
),
6455 static __inline__ __m128d __DEFAULT_FN_ATTRS
6456 _mm_maskz_unpacklo_pd(__mmask8 __U
, __m128d __A
, __m128d __B
)
6458 return (__m128d
)__builtin_ia32_selectpd_128((__mmask8
)__U
,
6459 (__v2df
)_mm_unpacklo_pd(__A
, __B
),
6460 (__v2df
)_mm_setzero_pd());
6463 static __inline__ __m256d __DEFAULT_FN_ATTRS
6464 _mm256_mask_unpacklo_pd(__m256d __W
, __mmask8 __U
, __m256d __A
, __m256d __B
)
6466 return (__m256d
)__builtin_ia32_selectpd_256((__mmask8
)__U
,
6467 (__v4df
)_mm256_unpacklo_pd(__A
, __B
),
6471 static __inline__ __m256d __DEFAULT_FN_ATTRS
6472 _mm256_maskz_unpacklo_pd(__mmask8 __U
, __m256d __A
, __m256d __B
)
6474 return (__m256d
)__builtin_ia32_selectpd_256((__mmask8
)__U
,
6475 (__v4df
)_mm256_unpacklo_pd(__A
, __B
),
6476 (__v4df
)_mm256_setzero_pd());
6479 static __inline__ __m128 __DEFAULT_FN_ATTRS
6480 _mm_mask_unpacklo_ps(__m128 __W
, __mmask8 __U
, __m128 __A
, __m128 __B
)
6482 return (__m128
)__builtin_ia32_selectps_128((__mmask8
)__U
,
6483 (__v4sf
)_mm_unpacklo_ps(__A
, __B
),
6487 static __inline__ __m128 __DEFAULT_FN_ATTRS
6488 _mm_maskz_unpacklo_ps(__mmask8 __U
, __m128 __A
, __m128 __B
)
6490 return (__m128
)__builtin_ia32_selectps_128((__mmask8
)__U
,
6491 (__v4sf
)_mm_unpacklo_ps(__A
, __B
),
6492 (__v4sf
)_mm_setzero_ps());
6495 static __inline__ __m256 __DEFAULT_FN_ATTRS
6496 _mm256_mask_unpacklo_ps(__m256 __W
, __mmask8 __U
, __m256 __A
, __m256 __B
)
6498 return (__m256
)__builtin_ia32_selectps_256((__mmask8
)__U
,
6499 (__v8sf
)_mm256_unpacklo_ps(__A
, __B
),
6503 static __inline__ __m256 __DEFAULT_FN_ATTRS
6504 _mm256_maskz_unpacklo_ps(__mmask8 __U
, __m256 __A
, __m256 __B
)
6506 return (__m256
)__builtin_ia32_selectps_256((__mmask8
)__U
,
6507 (__v8sf
)_mm256_unpacklo_ps(__A
, __B
),
6508 (__v8sf
)_mm256_setzero_ps());
6511 static __inline__ __m128d __DEFAULT_FN_ATTRS
6512 _mm_rcp14_pd (__m128d __A
)
6514 return (__m128d
) __builtin_ia32_rcp14pd128_mask ((__v2df
) __A
,
6520 static __inline__ __m128d __DEFAULT_FN_ATTRS
6521 _mm_mask_rcp14_pd (__m128d __W
, __mmask8 __U
, __m128d __A
)
6523 return (__m128d
) __builtin_ia32_rcp14pd128_mask ((__v2df
) __A
,
6528 static __inline__ __m128d __DEFAULT_FN_ATTRS
6529 _mm_maskz_rcp14_pd (__mmask8 __U
, __m128d __A
)
6531 return (__m128d
) __builtin_ia32_rcp14pd128_mask ((__v2df
) __A
,
6537 static __inline__ __m256d __DEFAULT_FN_ATTRS
6538 _mm256_rcp14_pd (__m256d __A
)
6540 return (__m256d
) __builtin_ia32_rcp14pd256_mask ((__v4df
) __A
,
6542 _mm256_setzero_pd (),
6546 static __inline__ __m256d __DEFAULT_FN_ATTRS
6547 _mm256_mask_rcp14_pd (__m256d __W
, __mmask8 __U
, __m256d __A
)
6549 return (__m256d
) __builtin_ia32_rcp14pd256_mask ((__v4df
) __A
,
6554 static __inline__ __m256d __DEFAULT_FN_ATTRS
6555 _mm256_maskz_rcp14_pd (__mmask8 __U
, __m256d __A
)
6557 return (__m256d
) __builtin_ia32_rcp14pd256_mask ((__v4df
) __A
,
6559 _mm256_setzero_pd (),
6563 static __inline__ __m128 __DEFAULT_FN_ATTRS
6564 _mm_rcp14_ps (__m128 __A
)
6566 return (__m128
) __builtin_ia32_rcp14ps128_mask ((__v4sf
) __A
,
6572 static __inline__ __m128 __DEFAULT_FN_ATTRS
6573 _mm_mask_rcp14_ps (__m128 __W
, __mmask8 __U
, __m128 __A
)
6575 return (__m128
) __builtin_ia32_rcp14ps128_mask ((__v4sf
) __A
,
6580 static __inline__ __m128 __DEFAULT_FN_ATTRS
6581 _mm_maskz_rcp14_ps (__mmask8 __U
, __m128 __A
)
6583 return (__m128
) __builtin_ia32_rcp14ps128_mask ((__v4sf
) __A
,
6589 static __inline__ __m256 __DEFAULT_FN_ATTRS
6590 _mm256_rcp14_ps (__m256 __A
)
6592 return (__m256
) __builtin_ia32_rcp14ps256_mask ((__v8sf
) __A
,
6594 _mm256_setzero_ps (),
6598 static __inline__ __m256 __DEFAULT_FN_ATTRS
6599 _mm256_mask_rcp14_ps (__m256 __W
, __mmask8 __U
, __m256 __A
)
6601 return (__m256
) __builtin_ia32_rcp14ps256_mask ((__v8sf
) __A
,
6606 static __inline__ __m256 __DEFAULT_FN_ATTRS
6607 _mm256_maskz_rcp14_ps (__mmask8 __U
, __m256 __A
)
6609 return (__m256
) __builtin_ia32_rcp14ps256_mask ((__v8sf
) __A
,
6611 _mm256_setzero_ps (),
6615 #define _mm_mask_permute_pd(W, U, X, C) __extension__ ({ \
6616 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6617 (__v2df)_mm_permute_pd((X), (C)), \
6618 (__v2df)(__m128d)(W)); })
6620 #define _mm_maskz_permute_pd(U, X, C) __extension__ ({ \
6621 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6622 (__v2df)_mm_permute_pd((X), (C)), \
6623 (__v2df)_mm_setzero_pd()); })
6625 #define _mm256_mask_permute_pd(W, U, X, C) __extension__ ({ \
6626 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6627 (__v4df)_mm256_permute_pd((X), (C)), \
6628 (__v4df)(__m256d)(W)); })
6630 #define _mm256_maskz_permute_pd(U, X, C) __extension__ ({ \
6631 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6632 (__v4df)_mm256_permute_pd((X), (C)), \
6633 (__v4df)_mm256_setzero_pd()); })
6635 #define _mm_mask_permute_ps(W, U, X, C) __extension__ ({ \
6636 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6637 (__v4sf)_mm_permute_ps((X), (C)), \
6638 (__v4sf)(__m128)(W)); })
6640 #define _mm_maskz_permute_ps(U, X, C) __extension__ ({ \
6641 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6642 (__v4sf)_mm_permute_ps((X), (C)), \
6643 (__v4sf)_mm_setzero_ps()); })
6645 #define _mm256_mask_permute_ps(W, U, X, C) __extension__ ({ \
6646 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6647 (__v8sf)_mm256_permute_ps((X), (C)), \
6648 (__v8sf)(__m256)(W)); })
6650 #define _mm256_maskz_permute_ps(U, X, C) __extension__ ({ \
6651 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6652 (__v8sf)_mm256_permute_ps((X), (C)), \
6653 (__v8sf)_mm256_setzero_ps()); })
6655 static __inline__ __m128d __DEFAULT_FN_ATTRS
6656 _mm_mask_permutevar_pd (__m128d __W
, __mmask8 __U
, __m128d __A
,
6659 return (__m128d
) __builtin_ia32_vpermilvarpd_mask ((__v2df
) __A
,
6665 static __inline__ __m128d __DEFAULT_FN_ATTRS
6666 _mm_maskz_permutevar_pd (__mmask8 __U
, __m128d __A
, __m128i __C
)
6668 return (__m128d
) __builtin_ia32_vpermilvarpd_mask ((__v2df
) __A
,
6675 static __inline__ __m256d __DEFAULT_FN_ATTRS
6676 _mm256_mask_permutevar_pd (__m256d __W
, __mmask8 __U
, __m256d __A
,
6679 return (__m256d
) __builtin_ia32_vpermilvarpd256_mask ((__v4df
) __A
,
6686 static __inline__ __m256d __DEFAULT_FN_ATTRS
6687 _mm256_maskz_permutevar_pd (__mmask8 __U
, __m256d __A
, __m256i __C
)
6689 return (__m256d
) __builtin_ia32_vpermilvarpd256_mask ((__v4df
) __A
,
6692 _mm256_setzero_pd (),
6697 static __inline__ __m128 __DEFAULT_FN_ATTRS
6698 _mm_mask_permutevar_ps (__m128 __W
, __mmask8 __U
, __m128 __A
,
6701 return (__m128
) __builtin_ia32_vpermilvarps_mask ((__v4sf
) __A
,
6707 static __inline__ __m128 __DEFAULT_FN_ATTRS
6708 _mm_maskz_permutevar_ps (__mmask8 __U
, __m128 __A
, __m128i __C
)
6710 return (__m128
) __builtin_ia32_vpermilvarps_mask ((__v4sf
) __A
,
6717 static __inline__ __m256 __DEFAULT_FN_ATTRS
6718 _mm256_mask_permutevar_ps (__m256 __W
, __mmask8 __U
, __m256 __A
,
6721 return (__m256
) __builtin_ia32_vpermilvarps256_mask ((__v8sf
) __A
,
6727 static __inline__ __m256 __DEFAULT_FN_ATTRS
6728 _mm256_maskz_permutevar_ps (__mmask8 __U
, __m256 __A
, __m256i __C
)
6730 return (__m256
) __builtin_ia32_vpermilvarps256_mask ((__v8sf
) __A
,
6733 _mm256_setzero_ps (),
6737 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6738 _mm_test_epi32_mask (__m128i __A
, __m128i __B
)
6740 return (__mmask8
) __builtin_ia32_ptestmd128 ((__v4si
) __A
,
6745 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6746 _mm_mask_test_epi32_mask (__mmask8 __U
, __m128i __A
, __m128i __B
)
6748 return (__mmask8
) __builtin_ia32_ptestmd128 ((__v4si
) __A
,
6752 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6753 _mm256_test_epi32_mask (__m256i __A
, __m256i __B
)
6755 return (__mmask8
) __builtin_ia32_ptestmd256 ((__v8si
) __A
,
6760 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6761 _mm256_mask_test_epi32_mask (__mmask8 __U
, __m256i __A
, __m256i __B
)
6763 return (__mmask8
) __builtin_ia32_ptestmd256 ((__v8si
) __A
,
6767 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6768 _mm_test_epi64_mask (__m128i __A
, __m128i __B
)
6770 return (__mmask8
) __builtin_ia32_ptestmq128 ((__v2di
) __A
,
6775 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6776 _mm_mask_test_epi64_mask (__mmask8 __U
, __m128i __A
, __m128i __B
)
6778 return (__mmask8
) __builtin_ia32_ptestmq128 ((__v2di
) __A
,
6782 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6783 _mm256_test_epi64_mask (__m256i __A
, __m256i __B
)
6785 return (__mmask8
) __builtin_ia32_ptestmq256 ((__v4di
) __A
,
6790 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6791 _mm256_mask_test_epi64_mask (__mmask8 __U
, __m256i __A
, __m256i __B
)
6793 return (__mmask8
) __builtin_ia32_ptestmq256 ((__v4di
) __A
,
6797 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6798 _mm_testn_epi32_mask (__m128i __A
, __m128i __B
)
6800 return (__mmask8
) __builtin_ia32_ptestnmd128 ((__v4si
) __A
,
6805 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6806 _mm_mask_testn_epi32_mask (__mmask8 __U
, __m128i __A
, __m128i __B
)
6808 return (__mmask8
) __builtin_ia32_ptestnmd128 ((__v4si
) __A
,
6812 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6813 _mm256_testn_epi32_mask (__m256i __A
, __m256i __B
)
6815 return (__mmask8
) __builtin_ia32_ptestnmd256 ((__v8si
) __A
,
6820 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6821 _mm256_mask_testn_epi32_mask (__mmask8 __U
, __m256i __A
, __m256i __B
)
6823 return (__mmask8
) __builtin_ia32_ptestnmd256 ((__v8si
) __A
,
6827 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6828 _mm_testn_epi64_mask (__m128i __A
, __m128i __B
)
6830 return (__mmask8
) __builtin_ia32_ptestnmq128 ((__v2di
) __A
,
6835 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6836 _mm_mask_testn_epi64_mask (__mmask8 __U
, __m128i __A
, __m128i __B
)
6838 return (__mmask8
) __builtin_ia32_ptestnmq128 ((__v2di
) __A
,
6842 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6843 _mm256_testn_epi64_mask (__m256i __A
, __m256i __B
)
6845 return (__mmask8
) __builtin_ia32_ptestnmq256 ((__v4di
) __A
,
6850 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6851 _mm256_mask_testn_epi64_mask (__mmask8 __U
, __m256i __A
, __m256i __B
)
6853 return (__mmask8
) __builtin_ia32_ptestnmq256 ((__v4di
) __A
,
6859 static __inline__ __m128i __DEFAULT_FN_ATTRS
6860 _mm_mask_unpackhi_epi32(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
6862 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
6863 (__v4si
)_mm_unpackhi_epi32(__A
, __B
),
6867 static __inline__ __m128i __DEFAULT_FN_ATTRS
6868 _mm_maskz_unpackhi_epi32(__mmask8 __U
, __m128i __A
, __m128i __B
)
6870 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
6871 (__v4si
)_mm_unpackhi_epi32(__A
, __B
),
6872 (__v4si
)_mm_setzero_si128());
6875 static __inline__ __m256i __DEFAULT_FN_ATTRS
6876 _mm256_mask_unpackhi_epi32(__m256i __W
, __mmask8 __U
, __m256i __A
, __m256i __B
)
6878 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
6879 (__v8si
)_mm256_unpackhi_epi32(__A
, __B
),
6883 static __inline__ __m256i __DEFAULT_FN_ATTRS
6884 _mm256_maskz_unpackhi_epi32(__mmask8 __U
, __m256i __A
, __m256i __B
)
6886 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
6887 (__v8si
)_mm256_unpackhi_epi32(__A
, __B
),
6888 (__v8si
)_mm256_setzero_si256());
6891 static __inline__ __m128i __DEFAULT_FN_ATTRS
6892 _mm_mask_unpackhi_epi64(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
6894 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
,
6895 (__v2di
)_mm_unpackhi_epi64(__A
, __B
),
6899 static __inline__ __m128i __DEFAULT_FN_ATTRS
6900 _mm_maskz_unpackhi_epi64(__mmask8 __U
, __m128i __A
, __m128i __B
)
6902 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
,
6903 (__v2di
)_mm_unpackhi_epi64(__A
, __B
),
6904 (__v2di
)_mm_setzero_di());
6907 static __inline__ __m256i __DEFAULT_FN_ATTRS
6908 _mm256_mask_unpackhi_epi64(__m256i __W
, __mmask8 __U
, __m256i __A
, __m256i __B
)
6910 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
,
6911 (__v4di
)_mm256_unpackhi_epi64(__A
, __B
),
6915 static __inline__ __m256i __DEFAULT_FN_ATTRS
6916 _mm256_maskz_unpackhi_epi64(__mmask8 __U
, __m256i __A
, __m256i __B
)
6918 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
,
6919 (__v4di
)_mm256_unpackhi_epi64(__A
, __B
),
6920 (__v4di
)_mm256_setzero_si256());
6923 static __inline__ __m128i __DEFAULT_FN_ATTRS
6924 _mm_mask_unpacklo_epi32(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
6926 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
6927 (__v4si
)_mm_unpacklo_epi32(__A
, __B
),
6931 static __inline__ __m128i __DEFAULT_FN_ATTRS
6932 _mm_maskz_unpacklo_epi32(__mmask8 __U
, __m128i __A
, __m128i __B
)
6934 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
6935 (__v4si
)_mm_unpacklo_epi32(__A
, __B
),
6936 (__v4si
)_mm_setzero_si128());
6939 static __inline__ __m256i __DEFAULT_FN_ATTRS
6940 _mm256_mask_unpacklo_epi32(__m256i __W
, __mmask8 __U
, __m256i __A
, __m256i __B
)
6942 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
6943 (__v8si
)_mm256_unpacklo_epi32(__A
, __B
),
6947 static __inline__ __m256i __DEFAULT_FN_ATTRS
6948 _mm256_maskz_unpacklo_epi32(__mmask8 __U
, __m256i __A
, __m256i __B
)
6950 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
6951 (__v8si
)_mm256_unpacklo_epi32(__A
, __B
),
6952 (__v8si
)_mm256_setzero_si256());
6955 static __inline__ __m128i __DEFAULT_FN_ATTRS
6956 _mm_mask_unpacklo_epi64(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
6958 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
,
6959 (__v2di
)_mm_unpacklo_epi64(__A
, __B
),
6963 static __inline__ __m128i __DEFAULT_FN_ATTRS
6964 _mm_maskz_unpacklo_epi64(__mmask8 __U
, __m128i __A
, __m128i __B
)
6966 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
,
6967 (__v2di
)_mm_unpacklo_epi64(__A
, __B
),
6968 (__v2di
)_mm_setzero_di());
6971 static __inline__ __m256i __DEFAULT_FN_ATTRS
6972 _mm256_mask_unpacklo_epi64(__m256i __W
, __mmask8 __U
, __m256i __A
, __m256i __B
)
6974 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
,
6975 (__v4di
)_mm256_unpacklo_epi64(__A
, __B
),
6979 static __inline__ __m256i __DEFAULT_FN_ATTRS
6980 _mm256_maskz_unpacklo_epi64(__mmask8 __U
, __m256i __A
, __m256i __B
)
6982 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
,
6983 (__v4di
)_mm256_unpacklo_epi64(__A
, __B
),
6984 (__v4di
)_mm256_setzero_si256());
6987 static __inline__ __m128i __DEFAULT_FN_ATTRS
6988 _mm_mask_sra_epi32 (__m128i __W
, __mmask8 __U
, __m128i __A
,
6991 return (__m128i
) __builtin_ia32_psrad128_mask ((__v4si
) __A
,
6997 static __inline__ __m128i __DEFAULT_FN_ATTRS
6998 _mm_maskz_sra_epi32 (__mmask8 __U
, __m128i __A
, __m128i __B
)
7000 return (__m128i
) __builtin_ia32_psrad128_mask ((__v4si
) __A
,
7003 _mm_setzero_si128 (),
7007 static __inline__ __m256i __DEFAULT_FN_ATTRS
7008 _mm256_mask_sra_epi32 (__m256i __W
, __mmask8 __U
, __m256i __A
,
7011 return (__m256i
) __builtin_ia32_psrad256_mask ((__v8si
) __A
,
7017 static __inline__ __m256i __DEFAULT_FN_ATTRS
7018 _mm256_maskz_sra_epi32 (__mmask8 __U
, __m256i __A
, __m128i __B
)
7020 return (__m256i
) __builtin_ia32_psrad256_mask ((__v8si
) __A
,
7023 _mm256_setzero_si256 (),
7027 #define _mm_mask_srai_epi32(W, U, A, imm) __extension__ ({ \
7028 (__m128i)__builtin_ia32_psradi128_mask((__v4si)(__m128i)(A), (int)(imm), \
7029 (__v4si)(__m128i)(W), \
7032 #define _mm_maskz_srai_epi32(U, A, imm) __extension__ ({ \
7033 (__m128i)__builtin_ia32_psradi128_mask((__v4si)(__m128i)(A), (int)(imm), \
7034 (__v4si)_mm_setzero_si128(), \
7037 #define _mm256_mask_srai_epi32(W, U, A, imm) __extension__ ({ \
7038 (__m256i)__builtin_ia32_psradi256_mask((__v8si)(__m256i)(A), (int)(imm), \
7039 (__v8si)(__m256i)(W), \
7042 #define _mm256_maskz_srai_epi32(U, A, imm) __extension__ ({ \
7043 (__m256i)__builtin_ia32_psradi256_mask((__v8si)(__m256i)(A), (int)(imm), \
7044 (__v8si)_mm256_setzero_si256(), \
7047 static __inline__ __m128i __DEFAULT_FN_ATTRS
7048 _mm_sra_epi64 (__m128i __A
, __m128i __B
)
7050 return (__m128i
) __builtin_ia32_psraq128_mask ((__v2di
) __A
,
7057 static __inline__ __m128i __DEFAULT_FN_ATTRS
7058 _mm_mask_sra_epi64 (__m128i __W
, __mmask8 __U
, __m128i __A
,
7061 return (__m128i
) __builtin_ia32_psraq128_mask ((__v2di
) __A
,
7067 static __inline__ __m128i __DEFAULT_FN_ATTRS
7068 _mm_maskz_sra_epi64 (__mmask8 __U
, __m128i __A
, __m128i __B
)
7070 return (__m128i
) __builtin_ia32_psraq128_mask ((__v2di
) __A
,
7077 static __inline__ __m256i __DEFAULT_FN_ATTRS
7078 _mm256_sra_epi64 (__m256i __A
, __m128i __B
)
7080 return (__m256i
) __builtin_ia32_psraq256_mask ((__v4di
) __A
,
7083 _mm256_setzero_si256 (),
7087 static __inline__ __m256i __DEFAULT_FN_ATTRS
7088 _mm256_mask_sra_epi64 (__m256i __W
, __mmask8 __U
, __m256i __A
,
7091 return (__m256i
) __builtin_ia32_psraq256_mask ((__v4di
) __A
,
7097 static __inline__ __m256i __DEFAULT_FN_ATTRS
7098 _mm256_maskz_sra_epi64 (__mmask8 __U
, __m256i __A
, __m128i __B
)
7100 return (__m256i
) __builtin_ia32_psraq256_mask ((__v4di
) __A
,
7103 _mm256_setzero_si256 (),
7107 #define _mm_srai_epi64(A, imm) __extension__ ({ \
7108 (__m128i)__builtin_ia32_psraqi128_mask((__v2di)(__m128i)(A), (int)(imm), \
7109 (__v2di)_mm_setzero_di(), \
7112 #define _mm_mask_srai_epi64(W, U, A, imm) __extension__ ({ \
7113 (__m128i)__builtin_ia32_psraqi128_mask((__v2di)(__m128i)(A), (int)(imm), \
7114 (__v2di)(__m128i)(W), \
7117 #define _mm_maskz_srai_epi64(U, A, imm) __extension__ ({ \
7118 (__m128i)__builtin_ia32_psraqi128_mask((__v2di)(__m128i)(A), (int)(imm), \
7119 (__v2di)_mm_setzero_si128(), \
7122 #define _mm256_srai_epi64(A, imm) __extension__ ({ \
7123 (__m256i)__builtin_ia32_psraqi256_mask((__v4di)(__m256i)(A), (int)(imm), \
7124 (__v4di)_mm256_setzero_si256(), \
7127 #define _mm256_mask_srai_epi64(W, U, A, imm) __extension__ ({ \
7128 (__m256i)__builtin_ia32_psraqi256_mask((__v4di)(__m256i)(A), (int)(imm), \
7129 (__v4di)(__m256i)(W), \
7132 #define _mm256_maskz_srai_epi64(U, A, imm) __extension__ ({ \
7133 (__m256i)__builtin_ia32_psraqi256_mask((__v4di)(__m256i)(A), (int)(imm), \
7134 (__v4di)_mm256_setzero_si256(), \
7137 #define _mm_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \
7138 (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \
7139 (__v4si)(__m128i)(B), \
7140 (__v4si)(__m128i)(C), (int)(imm), \
7143 #define _mm_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \
7144 (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \
7145 (__v4si)(__m128i)(B), \
7146 (__v4si)(__m128i)(C), (int)(imm), \
7149 #define _mm_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \
7150 (__m128i)__builtin_ia32_pternlogd128_maskz((__v4si)(__m128i)(A), \
7151 (__v4si)(__m128i)(B), \
7152 (__v4si)(__m128i)(C), (int)(imm), \
7155 #define _mm256_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \
7156 (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \
7157 (__v8si)(__m256i)(B), \
7158 (__v8si)(__m256i)(C), (int)(imm), \
7161 #define _mm256_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \
7162 (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \
7163 (__v8si)(__m256i)(B), \
7164 (__v8si)(__m256i)(C), (int)(imm), \
7167 #define _mm256_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \
7168 (__m256i)__builtin_ia32_pternlogd256_maskz((__v8si)(__m256i)(A), \
7169 (__v8si)(__m256i)(B), \
7170 (__v8si)(__m256i)(C), (int)(imm), \
7173 #define _mm_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \
7174 (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \
7175 (__v2di)(__m128i)(B), \
7176 (__v2di)(__m128i)(C), (int)(imm), \
7179 #define _mm_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \
7180 (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \
7181 (__v2di)(__m128i)(B), \
7182 (__v2di)(__m128i)(C), (int)(imm), \
7185 #define _mm_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \
7186 (__m128i)__builtin_ia32_pternlogq128_maskz((__v2di)(__m128i)(A), \
7187 (__v2di)(__m128i)(B), \
7188 (__v2di)(__m128i)(C), (int)(imm), \
7191 #define _mm256_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \
7192 (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \
7193 (__v4di)(__m256i)(B), \
7194 (__v4di)(__m256i)(C), (int)(imm), \
7197 #define _mm256_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \
7198 (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \
7199 (__v4di)(__m256i)(B), \
7200 (__v4di)(__m256i)(C), (int)(imm), \
7203 #define _mm256_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \
7204 (__m256i)__builtin_ia32_pternlogq256_maskz((__v4di)(__m256i)(A), \
7205 (__v4di)(__m256i)(B), \
7206 (__v4di)(__m256i)(C), (int)(imm), \
7211 #define _mm256_shuffle_f32x4(A, B, imm) __extension__ ({ \
7212 (__m256)__builtin_ia32_shuf_f32x4_256_mask((__v8sf)(__m256)(A), \
7213 (__v8sf)(__m256)(B), (int)(imm), \
7214 (__v8sf)_mm256_setzero_ps(), \
7217 #define _mm256_mask_shuffle_f32x4(W, U, A, B, imm) __extension__ ({ \
7218 (__m256)__builtin_ia32_shuf_f32x4_256_mask((__v8sf)(__m256)(A), \
7219 (__v8sf)(__m256)(B), (int)(imm), \
7220 (__v8sf)(__m256)(W), \
7223 #define _mm256_maskz_shuffle_f32x4(U, A, B, imm) __extension__ ({ \
7224 (__m256)__builtin_ia32_shuf_f32x4_256_mask((__v8sf)(__m256)(A), \
7225 (__v8sf)(__m256)(B), (int)(imm), \
7226 (__v8sf)_mm256_setzero_ps(), \
7229 #define _mm256_shuffle_f64x2(A, B, imm) __extension__ ({ \
7230 (__m256d)__builtin_ia32_shuf_f64x2_256_mask((__v4df)(__m256d)(A), \
7231 (__v4df)(__m256d)(B), \
7233 (__v4df)_mm256_setzero_pd(), \
7236 #define _mm256_mask_shuffle_f64x2(W, U, A, B, imm) __extension__ ({ \
7237 (__m256d)__builtin_ia32_shuf_f64x2_256_mask((__v4df)(__m256d)(A), \
7238 (__v4df)(__m256d)(B), \
7240 (__v4df)(__m256d)(W), \
7243 #define _mm256_maskz_shuffle_f64x2(U, A, B, imm) __extension__ ({ \
7244 (__m256d)__builtin_ia32_shuf_f64x2_256_mask((__v4df)(__m256d)(A), \
7245 (__v4df)(__m256d)(B), \
7247 (__v4df)_mm256_setzero_pd(), \
7250 #define _mm256_shuffle_i32x4(A, B, imm) __extension__ ({ \
7251 (__m256i)__builtin_ia32_shuf_i32x4_256_mask((__v8si)(__m256i)(A), \
7252 (__v8si)(__m256i)(B), \
7254 (__v8si)_mm256_setzero_si256(), \
7257 #define _mm256_mask_shuffle_i32x4(W, U, A, B, imm) __extension__ ({ \
7258 (__m256i)__builtin_ia32_shuf_i32x4_256_mask((__v8si)(__m256i)(A), \
7259 (__v8si)(__m256i)(B), \
7261 (__v8si)(__m256i)(W), \
7264 #define _mm256_maskz_shuffle_i32x4(U, A, B, imm) __extension__ ({ \
7265 (__m256i)__builtin_ia32_shuf_i32x4_256_mask((__v8si)(__m256i)(A), \
7266 (__v8si)(__m256i)(B), \
7268 (__v8si)_mm256_setzero_si256(), \
7271 #define _mm256_shuffle_i64x2(A, B, imm) __extension__ ({ \
7272 (__m256i)__builtin_ia32_shuf_i64x2_256_mask((__v4di)(__m256i)(A), \
7273 (__v4di)(__m256i)(B), \
7275 (__v4di)_mm256_setzero_si256(), \
7278 #define _mm256_mask_shuffle_i64x2(W, U, A, B, imm) __extension__ ({ \
7279 (__m256i)__builtin_ia32_shuf_i64x2_256_mask((__v4di)(__m256i)(A), \
7280 (__v4di)(__m256i)(B), \
7282 (__v4di)(__m256i)(W), \
7285 #define _mm256_maskz_shuffle_i64x2(U, A, B, imm) __extension__ ({ \
7286 (__m256i)__builtin_ia32_shuf_i64x2_256_mask((__v4di)(__m256i)(A), \
7287 (__v4di)(__m256i)(B), \
7289 (__v4di)_mm256_setzero_si256(), \
7292 #define _mm_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \
7293 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
7294 (__v2df)_mm_shuffle_pd((A), (B), (M)), \
7295 (__v2df)(__m128d)(W)); })
7297 #define _mm_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \
7298 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
7299 (__v2df)_mm_shuffle_pd((A), (B), (M)), \
7300 (__v2df)_mm_setzero_pd()); })
7302 #define _mm256_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \
7303 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
7304 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
7305 (__v4df)(__m256d)(W)); })
7307 #define _mm256_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \
7308 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
7309 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
7310 (__v4df)_mm256_setzero_pd()); })
7312 #define _mm_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \
7313 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
7314 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
7315 (__v4sf)(__m128)(W)); })
7317 #define _mm_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \
7318 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
7319 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
7320 (__v4sf)_mm_setzero_ps()); })
7322 #define _mm256_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \
7323 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
7324 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
7325 (__v8sf)(__m256)(W)); })
7327 #define _mm256_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \
7328 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
7329 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
7330 (__v8sf)_mm256_setzero_ps()); })
7332 static __inline__ __m128d __DEFAULT_FN_ATTRS
7333 _mm_rsqrt14_pd (__m128d __A
)
7335 return (__m128d
) __builtin_ia32_rsqrt14pd128_mask ((__v2df
) __A
,
7341 static __inline__ __m128d __DEFAULT_FN_ATTRS
7342 _mm_mask_rsqrt14_pd (__m128d __W
, __mmask8 __U
, __m128d __A
)
7344 return (__m128d
) __builtin_ia32_rsqrt14pd128_mask ((__v2df
) __A
,
7349 static __inline__ __m128d __DEFAULT_FN_ATTRS
7350 _mm_maskz_rsqrt14_pd (__mmask8 __U
, __m128d __A
)
7352 return (__m128d
) __builtin_ia32_rsqrt14pd128_mask ((__v2df
) __A
,
7358 static __inline__ __m256d __DEFAULT_FN_ATTRS
7359 _mm256_rsqrt14_pd (__m256d __A
)
7361 return (__m256d
) __builtin_ia32_rsqrt14pd256_mask ((__v4df
) __A
,
7363 _mm256_setzero_pd (),
7367 static __inline__ __m256d __DEFAULT_FN_ATTRS
7368 _mm256_mask_rsqrt14_pd (__m256d __W
, __mmask8 __U
, __m256d __A
)
7370 return (__m256d
) __builtin_ia32_rsqrt14pd256_mask ((__v4df
) __A
,
7375 static __inline__ __m256d __DEFAULT_FN_ATTRS
7376 _mm256_maskz_rsqrt14_pd (__mmask8 __U
, __m256d __A
)
7378 return (__m256d
) __builtin_ia32_rsqrt14pd256_mask ((__v4df
) __A
,
7380 _mm256_setzero_pd (),
7384 static __inline__ __m128 __DEFAULT_FN_ATTRS
7385 _mm_rsqrt14_ps (__m128 __A
)
7387 return (__m128
) __builtin_ia32_rsqrt14ps128_mask ((__v4sf
) __A
,
7393 static __inline__ __m128 __DEFAULT_FN_ATTRS
7394 _mm_mask_rsqrt14_ps (__m128 __W
, __mmask8 __U
, __m128 __A
)
7396 return (__m128
) __builtin_ia32_rsqrt14ps128_mask ((__v4sf
) __A
,
7401 static __inline__ __m128 __DEFAULT_FN_ATTRS
7402 _mm_maskz_rsqrt14_ps (__mmask8 __U
, __m128 __A
)
7404 return (__m128
) __builtin_ia32_rsqrt14ps128_mask ((__v4sf
) __A
,
7410 static __inline__ __m256 __DEFAULT_FN_ATTRS
7411 _mm256_rsqrt14_ps (__m256 __A
)
7413 return (__m256
) __builtin_ia32_rsqrt14ps256_mask ((__v8sf
) __A
,
7415 _mm256_setzero_ps (),
7419 static __inline__ __m256 __DEFAULT_FN_ATTRS
7420 _mm256_mask_rsqrt14_ps (__m256 __W
, __mmask8 __U
, __m256 __A
)
7422 return (__m256
) __builtin_ia32_rsqrt14ps256_mask ((__v8sf
) __A
,
7427 static __inline__ __m256 __DEFAULT_FN_ATTRS
7428 _mm256_maskz_rsqrt14_ps (__mmask8 __U
, __m256 __A
)
7430 return (__m256
) __builtin_ia32_rsqrt14ps256_mask ((__v8sf
) __A
,
7432 _mm256_setzero_ps (),
7436 static __inline__ __m256 __DEFAULT_FN_ATTRS
7437 _mm256_broadcast_f32x4 (__m128 __A
)
7439 return (__m256
) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf
) __A
,
7440 (__v8sf
)_mm256_undefined_pd (),
7444 static __inline__ __m256 __DEFAULT_FN_ATTRS
7445 _mm256_mask_broadcast_f32x4 (__m256 __O
, __mmask8 __M
, __m128 __A
)
7447 return (__m256
) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf
) __A
,
7452 static __inline__ __m256 __DEFAULT_FN_ATTRS
7453 _mm256_maskz_broadcast_f32x4 (__mmask8 __M
, __m128 __A
)
7455 return (__m256
) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf
) __A
,
7456 (__v8sf
) _mm256_setzero_ps (),
7460 static __inline__ __m256i __DEFAULT_FN_ATTRS
7461 _mm256_broadcast_i32x4 (__m128i __A
)
7463 return (__m256i
) __builtin_ia32_broadcasti32x4_256_mask ((__v4si
) __A
,
7464 (__v8si
)_mm256_undefined_si256 (),
7468 static __inline__ __m256i __DEFAULT_FN_ATTRS
7469 _mm256_mask_broadcast_i32x4 (__m256i __O
, __mmask8 __M
, __m128i __A
)
7471 return (__m256i
) __builtin_ia32_broadcasti32x4_256_mask ((__v4si
) __A
,
7476 static __inline__ __m256i __DEFAULT_FN_ATTRS
7477 _mm256_maskz_broadcast_i32x4 (__mmask8 __M
, __m128i __A
)
7479 return (__m256i
) __builtin_ia32_broadcasti32x4_256_mask ((__v4si
)
7481 (__v8si
) _mm256_setzero_si256 (),
7485 static __inline__ __m256d __DEFAULT_FN_ATTRS
7486 _mm256_mask_broadcastsd_pd (__m256d __O
, __mmask8 __M
, __m128d __A
)
7488 return (__m256d
)__builtin_ia32_selectpd_256(__M
,
7489 (__v4df
) _mm256_broadcastsd_pd(__A
),
7493 static __inline__ __m256d __DEFAULT_FN_ATTRS
7494 _mm256_maskz_broadcastsd_pd (__mmask8 __M
, __m128d __A
)
7496 return (__m256d
)__builtin_ia32_selectpd_256(__M
,
7497 (__v4df
) _mm256_broadcastsd_pd(__A
),
7498 (__v4df
) _mm256_setzero_pd());
7501 static __inline__ __m128 __DEFAULT_FN_ATTRS
7502 _mm_mask_broadcastss_ps (__m128 __O
, __mmask8 __M
, __m128 __A
)
7504 return (__m128
)__builtin_ia32_selectps_128(__M
,
7505 (__v4sf
) _mm_broadcastss_ps(__A
),
7509 static __inline__ __m128 __DEFAULT_FN_ATTRS
7510 _mm_maskz_broadcastss_ps (__mmask8 __M
, __m128 __A
)
7512 return (__m128
)__builtin_ia32_selectps_128(__M
,
7513 (__v4sf
) _mm_broadcastss_ps(__A
),
7514 (__v4sf
) _mm_setzero_ps());
7517 static __inline__ __m256 __DEFAULT_FN_ATTRS
7518 _mm256_mask_broadcastss_ps (__m256 __O
, __mmask8 __M
, __m128 __A
)
7520 return (__m256
)__builtin_ia32_selectps_256(__M
,
7521 (__v8sf
) _mm256_broadcastss_ps(__A
),
7525 static __inline__ __m256 __DEFAULT_FN_ATTRS
7526 _mm256_maskz_broadcastss_ps (__mmask8 __M
, __m128 __A
)
7528 return (__m256
)__builtin_ia32_selectps_256(__M
,
7529 (__v8sf
) _mm256_broadcastss_ps(__A
),
7530 (__v8sf
) _mm256_setzero_ps());
7533 static __inline__ __m128i __DEFAULT_FN_ATTRS
7534 _mm_mask_broadcastd_epi32 (__m128i __O
, __mmask8 __M
, __m128i __A
)
7536 return (__m128i
)__builtin_ia32_selectd_128(__M
,
7537 (__v4si
) _mm_broadcastd_epi32(__A
),
7541 static __inline__ __m128i __DEFAULT_FN_ATTRS
7542 _mm_maskz_broadcastd_epi32 (__mmask8 __M
, __m128i __A
)
7544 return (__m128i
)__builtin_ia32_selectd_128(__M
,
7545 (__v4si
) _mm_broadcastd_epi32(__A
),
7546 (__v4si
) _mm_setzero_si128());
7549 static __inline__ __m256i __DEFAULT_FN_ATTRS
7550 _mm256_mask_broadcastd_epi32 (__m256i __O
, __mmask8 __M
, __m128i __A
)
7552 return (__m256i
)__builtin_ia32_selectd_256(__M
,
7553 (__v8si
) _mm256_broadcastd_epi32(__A
),
7557 static __inline__ __m256i __DEFAULT_FN_ATTRS
7558 _mm256_maskz_broadcastd_epi32 (__mmask8 __M
, __m128i __A
)
7560 return (__m256i
)__builtin_ia32_selectd_256(__M
,
7561 (__v8si
) _mm256_broadcastd_epi32(__A
),
7562 (__v8si
) _mm256_setzero_si256());
7565 static __inline__ __m128i __DEFAULT_FN_ATTRS
7566 _mm_mask_broadcastq_epi64 (__m128i __O
, __mmask8 __M
, __m128i __A
)
7568 return (__m128i
)__builtin_ia32_selectq_128(__M
,
7569 (__v2di
) _mm_broadcastq_epi64(__A
),
7573 static __inline__ __m128i __DEFAULT_FN_ATTRS
7574 _mm_maskz_broadcastq_epi64 (__mmask8 __M
, __m128i __A
)
7576 return (__m128i
)__builtin_ia32_selectq_128(__M
,
7577 (__v2di
) _mm_broadcastq_epi64(__A
),
7578 (__v2di
) _mm_setzero_si128());
7581 static __inline__ __m256i __DEFAULT_FN_ATTRS
7582 _mm256_mask_broadcastq_epi64 (__m256i __O
, __mmask8 __M
, __m128i __A
)
7584 return (__m256i
)__builtin_ia32_selectq_256(__M
,
7585 (__v4di
) _mm256_broadcastq_epi64(__A
),
7589 static __inline__ __m256i __DEFAULT_FN_ATTRS
7590 _mm256_maskz_broadcastq_epi64 (__mmask8 __M
, __m128i __A
)
7592 return (__m256i
)__builtin_ia32_selectq_256(__M
,
7593 (__v4di
) _mm256_broadcastq_epi64(__A
),
7594 (__v4di
) _mm256_setzero_si256());
7597 static __inline__ __m128i __DEFAULT_FN_ATTRS
7598 _mm_cvtsepi32_epi8 (__m128i __A
)
7600 return (__m128i
) __builtin_ia32_pmovsdb128_mask ((__v4si
) __A
,
7601 (__v16qi
)_mm_undefined_si128(),
7605 static __inline__ __m128i __DEFAULT_FN_ATTRS
7606 _mm_mask_cvtsepi32_epi8 (__m128i __O
, __mmask8 __M
, __m128i __A
)
7608 return (__m128i
) __builtin_ia32_pmovsdb128_mask ((__v4si
) __A
,
7609 (__v16qi
) __O
, __M
);
7612 static __inline__ __m128i __DEFAULT_FN_ATTRS
7613 _mm_maskz_cvtsepi32_epi8 (__mmask8 __M
, __m128i __A
)
7615 return (__m128i
) __builtin_ia32_pmovsdb128_mask ((__v4si
) __A
,
7616 (__v16qi
) _mm_setzero_si128 (),
7620 static __inline__
void __DEFAULT_FN_ATTRS
7621 _mm_mask_cvtsepi32_storeu_epi8 (void * __P
, __mmask8 __M
, __m128i __A
)
7623 __builtin_ia32_pmovsdb128mem_mask ((__v16qi
*) __P
, (__v4si
) __A
, __M
);
7626 static __inline__ __m128i __DEFAULT_FN_ATTRS
7627 _mm256_cvtsepi32_epi8 (__m256i __A
)
7629 return (__m128i
) __builtin_ia32_pmovsdb256_mask ((__v8si
) __A
,
7630 (__v16qi
)_mm_undefined_si128(),
7634 static __inline__ __m128i __DEFAULT_FN_ATTRS
7635 _mm256_mask_cvtsepi32_epi8 (__m128i __O
, __mmask8 __M
, __m256i __A
)
7637 return (__m128i
) __builtin_ia32_pmovsdb256_mask ((__v8si
) __A
,
7638 (__v16qi
) __O
, __M
);
7641 static __inline__ __m128i __DEFAULT_FN_ATTRS
7642 _mm256_maskz_cvtsepi32_epi8 (__mmask8 __M
, __m256i __A
)
7644 return (__m128i
) __builtin_ia32_pmovsdb256_mask ((__v8si
) __A
,
7645 (__v16qi
) _mm_setzero_si128 (),
7649 static __inline__
void __DEFAULT_FN_ATTRS
7650 _mm256_mask_cvtsepi32_storeu_epi8 (void * __P
, __mmask8 __M
, __m256i __A
)
7652 __builtin_ia32_pmovsdb256mem_mask ((__v16qi
*) __P
, (__v8si
) __A
, __M
);
7655 static __inline__ __m128i __DEFAULT_FN_ATTRS
7656 _mm_cvtsepi32_epi16 (__m128i __A
)
7658 return (__m128i
) __builtin_ia32_pmovsdw128_mask ((__v4si
) __A
,
7659 (__v8hi
)_mm_setzero_si128 (),
7663 static __inline__ __m128i __DEFAULT_FN_ATTRS
7664 _mm_mask_cvtsepi32_epi16 (__m128i __O
, __mmask8 __M
, __m128i __A
)
7666 return (__m128i
) __builtin_ia32_pmovsdw128_mask ((__v4si
) __A
,
7671 static __inline__ __m128i __DEFAULT_FN_ATTRS
7672 _mm_maskz_cvtsepi32_epi16 (__mmask8 __M
, __m128i __A
)
7674 return (__m128i
) __builtin_ia32_pmovsdw128_mask ((__v4si
) __A
,
7675 (__v8hi
) _mm_setzero_si128 (),
7679 static __inline__
void __DEFAULT_FN_ATTRS
7680 _mm_mask_cvtsepi32_storeu_epi16 (void * __P
, __mmask8 __M
, __m128i __A
)
7682 __builtin_ia32_pmovsdw128mem_mask ((__v8hi
*) __P
, (__v4si
) __A
, __M
);
7685 static __inline__ __m128i __DEFAULT_FN_ATTRS
7686 _mm256_cvtsepi32_epi16 (__m256i __A
)
7688 return (__m128i
) __builtin_ia32_pmovsdw256_mask ((__v8si
) __A
,
7689 (__v8hi
)_mm_undefined_si128(),
7693 static __inline__ __m128i __DEFAULT_FN_ATTRS
7694 _mm256_mask_cvtsepi32_epi16 (__m128i __O
, __mmask8 __M
, __m256i __A
)
7696 return (__m128i
) __builtin_ia32_pmovsdw256_mask ((__v8si
) __A
,
7700 static __inline__ __m128i __DEFAULT_FN_ATTRS
7701 _mm256_maskz_cvtsepi32_epi16 (__mmask8 __M
, __m256i __A
)
7703 return (__m128i
) __builtin_ia32_pmovsdw256_mask ((__v8si
) __A
,
7704 (__v8hi
) _mm_setzero_si128 (),
7708 static __inline__
void __DEFAULT_FN_ATTRS
7709 _mm256_mask_cvtsepi32_storeu_epi16 (void * __P
, __mmask8 __M
, __m256i __A
)
7711 __builtin_ia32_pmovsdw256mem_mask ((__v8hi
*) __P
, (__v8si
) __A
, __M
);
7714 static __inline__ __m128i __DEFAULT_FN_ATTRS
7715 _mm_cvtsepi64_epi8 (__m128i __A
)
7717 return (__m128i
) __builtin_ia32_pmovsqb128_mask ((__v2di
) __A
,
7718 (__v16qi
)_mm_undefined_si128(),
7722 static __inline__ __m128i __DEFAULT_FN_ATTRS
7723 _mm_mask_cvtsepi64_epi8 (__m128i __O
, __mmask8 __M
, __m128i __A
)
7725 return (__m128i
) __builtin_ia32_pmovsqb128_mask ((__v2di
) __A
,
7726 (__v16qi
) __O
, __M
);
7729 static __inline__ __m128i __DEFAULT_FN_ATTRS
7730 _mm_maskz_cvtsepi64_epi8 (__mmask8 __M
, __m128i __A
)
7732 return (__m128i
) __builtin_ia32_pmovsqb128_mask ((__v2di
) __A
,
7733 (__v16qi
) _mm_setzero_si128 (),
7737 static __inline__
void __DEFAULT_FN_ATTRS
7738 _mm_mask_cvtsepi64_storeu_epi8 (void * __P
, __mmask8 __M
, __m128i __A
)
7740 __builtin_ia32_pmovsqb128mem_mask ((__v16qi
*) __P
, (__v2di
) __A
, __M
);
7743 static __inline__ __m128i __DEFAULT_FN_ATTRS
7744 _mm256_cvtsepi64_epi8 (__m256i __A
)
7746 return (__m128i
) __builtin_ia32_pmovsqb256_mask ((__v4di
) __A
,
7747 (__v16qi
)_mm_undefined_si128(),
7751 static __inline__ __m128i __DEFAULT_FN_ATTRS
7752 _mm256_mask_cvtsepi64_epi8 (__m128i __O
, __mmask8 __M
, __m256i __A
)
7754 return (__m128i
) __builtin_ia32_pmovsqb256_mask ((__v4di
) __A
,
7755 (__v16qi
) __O
, __M
);
7758 static __inline__ __m128i __DEFAULT_FN_ATTRS
7759 _mm256_maskz_cvtsepi64_epi8 (__mmask8 __M
, __m256i __A
)
7761 return (__m128i
) __builtin_ia32_pmovsqb256_mask ((__v4di
) __A
,
7762 (__v16qi
) _mm_setzero_si128 (),
7766 static __inline__
void __DEFAULT_FN_ATTRS
7767 _mm256_mask_cvtsepi64_storeu_epi8 (void * __P
, __mmask8 __M
, __m256i __A
)
7769 __builtin_ia32_pmovsqb256mem_mask ((__v16qi
*) __P
, (__v4di
) __A
, __M
);
7772 static __inline__ __m128i __DEFAULT_FN_ATTRS
7773 _mm_cvtsepi64_epi32 (__m128i __A
)
7775 return (__m128i
) __builtin_ia32_pmovsqd128_mask ((__v2di
) __A
,
7776 (__v4si
)_mm_undefined_si128(),
7780 static __inline__ __m128i __DEFAULT_FN_ATTRS
7781 _mm_mask_cvtsepi64_epi32 (__m128i __O
, __mmask8 __M
, __m128i __A
)
7783 return (__m128i
) __builtin_ia32_pmovsqd128_mask ((__v2di
) __A
,
7787 static __inline__ __m128i __DEFAULT_FN_ATTRS
7788 _mm_maskz_cvtsepi64_epi32 (__mmask8 __M
, __m128i __A
)
7790 return (__m128i
) __builtin_ia32_pmovsqd128_mask ((__v2di
) __A
,
7791 (__v4si
) _mm_setzero_si128 (),
7795 static __inline__
void __DEFAULT_FN_ATTRS
7796 _mm_mask_cvtsepi64_storeu_epi32 (void * __P
, __mmask8 __M
, __m128i __A
)
7798 __builtin_ia32_pmovsqd128mem_mask ((__v4si
*) __P
, (__v2di
) __A
, __M
);
7801 static __inline__ __m128i __DEFAULT_FN_ATTRS
7802 _mm256_cvtsepi64_epi32 (__m256i __A
)
7804 return (__m128i
) __builtin_ia32_pmovsqd256_mask ((__v4di
) __A
,
7805 (__v4si
)_mm_undefined_si128(),
7809 static __inline__ __m128i __DEFAULT_FN_ATTRS
7810 _mm256_mask_cvtsepi64_epi32 (__m128i __O
, __mmask8 __M
, __m256i __A
)
7812 return (__m128i
) __builtin_ia32_pmovsqd256_mask ((__v4di
) __A
,
7817 static __inline__ __m128i __DEFAULT_FN_ATTRS
7818 _mm256_maskz_cvtsepi64_epi32 (__mmask8 __M
, __m256i __A
)
7820 return (__m128i
) __builtin_ia32_pmovsqd256_mask ((__v4di
) __A
,
7821 (__v4si
) _mm_setzero_si128 (),
7825 static __inline__
void __DEFAULT_FN_ATTRS
7826 _mm256_mask_cvtsepi64_storeu_epi32 (void * __P
, __mmask8 __M
, __m256i __A
)
7828 __builtin_ia32_pmovsqd256mem_mask ((__v4si
*) __P
, (__v4di
) __A
, __M
);
7831 static __inline__ __m128i __DEFAULT_FN_ATTRS
7832 _mm_cvtsepi64_epi16 (__m128i __A
)
7834 return (__m128i
) __builtin_ia32_pmovsqw128_mask ((__v2di
) __A
,
7835 (__v8hi
)_mm_undefined_si128(),
7839 static __inline__ __m128i __DEFAULT_FN_ATTRS
7840 _mm_mask_cvtsepi64_epi16 (__m128i __O
, __mmask8 __M
, __m128i __A
)
7842 return (__m128i
) __builtin_ia32_pmovsqw128_mask ((__v2di
) __A
,
7846 static __inline__ __m128i __DEFAULT_FN_ATTRS
7847 _mm_maskz_cvtsepi64_epi16 (__mmask8 __M
, __m128i __A
)
7849 return (__m128i
) __builtin_ia32_pmovsqw128_mask ((__v2di
) __A
,
7850 (__v8hi
) _mm_setzero_si128 (),
7854 static __inline__
void __DEFAULT_FN_ATTRS
7855 _mm_mask_cvtsepi64_storeu_epi16 (void * __P
, __mmask8 __M
, __m128i __A
)
7857 __builtin_ia32_pmovsqw128mem_mask ((__v8hi
*) __P
, (__v2di
) __A
, __M
);
7860 static __inline__ __m128i __DEFAULT_FN_ATTRS
7861 _mm256_cvtsepi64_epi16 (__m256i __A
)
7863 return (__m128i
) __builtin_ia32_pmovsqw256_mask ((__v4di
) __A
,
7864 (__v8hi
)_mm_undefined_si128(),
7868 static __inline__ __m128i __DEFAULT_FN_ATTRS
7869 _mm256_mask_cvtsepi64_epi16 (__m128i __O
, __mmask8 __M
, __m256i __A
)
7871 return (__m128i
) __builtin_ia32_pmovsqw256_mask ((__v4di
) __A
,
7875 static __inline__ __m128i __DEFAULT_FN_ATTRS
7876 _mm256_maskz_cvtsepi64_epi16 (__mmask8 __M
, __m256i __A
)
7878 return (__m128i
) __builtin_ia32_pmovsqw256_mask ((__v4di
) __A
,
7879 (__v8hi
) _mm_setzero_si128 (),
7883 static __inline__
void __DEFAULT_FN_ATTRS
7884 _mm256_mask_cvtsepi64_storeu_epi16 (void * __P
, __mmask8 __M
, __m256i __A
)
7886 __builtin_ia32_pmovsqw256mem_mask ((__v8hi
*) __P
, (__v4di
) __A
, __M
);
7889 static __inline__ __m128i __DEFAULT_FN_ATTRS
7890 _mm_cvtusepi32_epi8 (__m128i __A
)
7892 return (__m128i
) __builtin_ia32_pmovusdb128_mask ((__v4si
) __A
,
7893 (__v16qi
)_mm_undefined_si128(),
7897 static __inline__ __m128i __DEFAULT_FN_ATTRS
7898 _mm_mask_cvtusepi32_epi8 (__m128i __O
, __mmask8 __M
, __m128i __A
)
7900 return (__m128i
) __builtin_ia32_pmovusdb128_mask ((__v4si
) __A
,
7905 static __inline__ __m128i __DEFAULT_FN_ATTRS
7906 _mm_maskz_cvtusepi32_epi8 (__mmask8 __M
, __m128i __A
)
7908 return (__m128i
) __builtin_ia32_pmovusdb128_mask ((__v4si
) __A
,
7909 (__v16qi
) _mm_setzero_si128 (),
7913 static __inline__
void __DEFAULT_FN_ATTRS
7914 _mm_mask_cvtusepi32_storeu_epi8 (void * __P
, __mmask8 __M
, __m128i __A
)
7916 __builtin_ia32_pmovusdb128mem_mask ((__v16qi
*) __P
, (__v4si
) __A
, __M
);
7919 static __inline__ __m128i __DEFAULT_FN_ATTRS
7920 _mm256_cvtusepi32_epi8 (__m256i __A
)
7922 return (__m128i
) __builtin_ia32_pmovusdb256_mask ((__v8si
) __A
,
7923 (__v16qi
)_mm_undefined_si128(),
7927 static __inline__ __m128i __DEFAULT_FN_ATTRS
7928 _mm256_mask_cvtusepi32_epi8 (__m128i __O
, __mmask8 __M
, __m256i __A
)
7930 return (__m128i
) __builtin_ia32_pmovusdb256_mask ((__v8si
) __A
,
7935 static __inline__ __m128i __DEFAULT_FN_ATTRS
7936 _mm256_maskz_cvtusepi32_epi8 (__mmask8 __M
, __m256i __A
)
7938 return (__m128i
) __builtin_ia32_pmovusdb256_mask ((__v8si
) __A
,
7939 (__v16qi
) _mm_setzero_si128 (),
7943 static __inline__
void __DEFAULT_FN_ATTRS
7944 _mm256_mask_cvtusepi32_storeu_epi8 (void * __P
, __mmask8 __M
, __m256i __A
)
7946 __builtin_ia32_pmovusdb256mem_mask ((__v16qi
*) __P
, (__v8si
) __A
, __M
);
7949 static __inline__ __m128i __DEFAULT_FN_ATTRS
7950 _mm_cvtusepi32_epi16 (__m128i __A
)
7952 return (__m128i
) __builtin_ia32_pmovusdw128_mask ((__v4si
) __A
,
7953 (__v8hi
)_mm_undefined_si128(),
7957 static __inline__ __m128i __DEFAULT_FN_ATTRS
7958 _mm_mask_cvtusepi32_epi16 (__m128i __O
, __mmask8 __M
, __m128i __A
)
7960 return (__m128i
) __builtin_ia32_pmovusdw128_mask ((__v4si
) __A
,
7964 static __inline__ __m128i __DEFAULT_FN_ATTRS
7965 _mm_maskz_cvtusepi32_epi16 (__mmask8 __M
, __m128i __A
)
7967 return (__m128i
) __builtin_ia32_pmovusdw128_mask ((__v4si
) __A
,
7968 (__v8hi
) _mm_setzero_si128 (),
7972 static __inline__
void __DEFAULT_FN_ATTRS
7973 _mm_mask_cvtusepi32_storeu_epi16 (void * __P
, __mmask8 __M
, __m128i __A
)
7975 __builtin_ia32_pmovusdw128mem_mask ((__v8hi
*) __P
, (__v4si
) __A
, __M
);
7978 static __inline__ __m128i __DEFAULT_FN_ATTRS
7979 _mm256_cvtusepi32_epi16 (__m256i __A
)
7981 return (__m128i
) __builtin_ia32_pmovusdw256_mask ((__v8si
) __A
,
7982 (__v8hi
) _mm_undefined_si128(),
7986 static __inline__ __m128i __DEFAULT_FN_ATTRS
7987 _mm256_mask_cvtusepi32_epi16 (__m128i __O
, __mmask8 __M
, __m256i __A
)
7989 return (__m128i
) __builtin_ia32_pmovusdw256_mask ((__v8si
) __A
,
7993 static __inline__ __m128i __DEFAULT_FN_ATTRS
7994 _mm256_maskz_cvtusepi32_epi16 (__mmask8 __M
, __m256i __A
)
7996 return (__m128i
) __builtin_ia32_pmovusdw256_mask ((__v8si
) __A
,
7997 (__v8hi
) _mm_setzero_si128 (),
8001 static __inline__
void __DEFAULT_FN_ATTRS
8002 _mm256_mask_cvtusepi32_storeu_epi16 (void * __P
, __mmask8 __M
, __m256i __A
)
8004 __builtin_ia32_pmovusdw256mem_mask ((__v8hi
*) __P
, (__v8si
) __A
, __M
);
8007 static __inline__ __m128i __DEFAULT_FN_ATTRS
8008 _mm_cvtusepi64_epi8 (__m128i __A
)
8010 return (__m128i
) __builtin_ia32_pmovusqb128_mask ((__v2di
) __A
,
8011 (__v16qi
)_mm_undefined_si128(),
8015 static __inline__ __m128i __DEFAULT_FN_ATTRS
8016 _mm_mask_cvtusepi64_epi8 (__m128i __O
, __mmask8 __M
, __m128i __A
)
8018 return (__m128i
) __builtin_ia32_pmovusqb128_mask ((__v2di
) __A
,
8023 static __inline__ __m128i __DEFAULT_FN_ATTRS
8024 _mm_maskz_cvtusepi64_epi8 (__mmask8 __M
, __m128i __A
)
8026 return (__m128i
) __builtin_ia32_pmovusqb128_mask ((__v2di
) __A
,
8027 (__v16qi
) _mm_setzero_si128 (),
8031 static __inline__
void __DEFAULT_FN_ATTRS
8032 _mm_mask_cvtusepi64_storeu_epi8 (void * __P
, __mmask8 __M
, __m128i __A
)
8034 __builtin_ia32_pmovusqb128mem_mask ((__v16qi
*) __P
, (__v2di
) __A
, __M
);
8037 static __inline__ __m128i __DEFAULT_FN_ATTRS
8038 _mm256_cvtusepi64_epi8 (__m256i __A
)
8040 return (__m128i
) __builtin_ia32_pmovusqb256_mask ((__v4di
) __A
,
8041 (__v16qi
)_mm_undefined_si128(),
8045 static __inline__ __m128i __DEFAULT_FN_ATTRS
8046 _mm256_mask_cvtusepi64_epi8 (__m128i __O
, __mmask8 __M
, __m256i __A
)
8048 return (__m128i
) __builtin_ia32_pmovusqb256_mask ((__v4di
) __A
,
8053 static __inline__ __m128i __DEFAULT_FN_ATTRS
8054 _mm256_maskz_cvtusepi64_epi8 (__mmask8 __M
, __m256i __A
)
8056 return (__m128i
) __builtin_ia32_pmovusqb256_mask ((__v4di
) __A
,
8057 (__v16qi
) _mm_setzero_si128 (),
8061 static __inline__
void __DEFAULT_FN_ATTRS
8062 _mm256_mask_cvtusepi64_storeu_epi8 (void * __P
, __mmask8 __M
, __m256i __A
)
8064 __builtin_ia32_pmovusqb256mem_mask ((__v16qi
*) __P
, (__v4di
) __A
, __M
);
8067 static __inline__ __m128i __DEFAULT_FN_ATTRS
8068 _mm_cvtusepi64_epi32 (__m128i __A
)
8070 return (__m128i
) __builtin_ia32_pmovusqd128_mask ((__v2di
) __A
,
8071 (__v4si
)_mm_undefined_si128(),
8075 static __inline__ __m128i __DEFAULT_FN_ATTRS
8076 _mm_mask_cvtusepi64_epi32 (__m128i __O
, __mmask8 __M
, __m128i __A
)
8078 return (__m128i
) __builtin_ia32_pmovusqd128_mask ((__v2di
) __A
,
8082 static __inline__ __m128i __DEFAULT_FN_ATTRS
8083 _mm_maskz_cvtusepi64_epi32 (__mmask8 __M
, __m128i __A
)
8085 return (__m128i
) __builtin_ia32_pmovusqd128_mask ((__v2di
) __A
,
8086 (__v4si
) _mm_setzero_si128 (),
8090 static __inline__
void __DEFAULT_FN_ATTRS
8091 _mm_mask_cvtusepi64_storeu_epi32 (void * __P
, __mmask8 __M
, __m128i __A
)
8093 __builtin_ia32_pmovusqd128mem_mask ((__v4si
*) __P
, (__v2di
) __A
, __M
);
8096 static __inline__ __m128i __DEFAULT_FN_ATTRS
8097 _mm256_cvtusepi64_epi32 (__m256i __A
)
8099 return (__m128i
) __builtin_ia32_pmovusqd256_mask ((__v4di
) __A
,
8100 (__v4si
)_mm_undefined_si128(),
8104 static __inline__ __m128i __DEFAULT_FN_ATTRS
8105 _mm256_mask_cvtusepi64_epi32 (__m128i __O
, __mmask8 __M
, __m256i __A
)
8107 return (__m128i
) __builtin_ia32_pmovusqd256_mask ((__v4di
) __A
,
8111 static __inline__ __m128i __DEFAULT_FN_ATTRS
8112 _mm256_maskz_cvtusepi64_epi32 (__mmask8 __M
, __m256i __A
)
8114 return (__m128i
) __builtin_ia32_pmovusqd256_mask ((__v4di
) __A
,
8115 (__v4si
) _mm_setzero_si128 (),
8119 static __inline__
void __DEFAULT_FN_ATTRS
8120 _mm256_mask_cvtusepi64_storeu_epi32 (void * __P
, __mmask8 __M
, __m256i __A
)
8122 __builtin_ia32_pmovusqd256mem_mask ((__v4si
*) __P
, (__v4di
) __A
, __M
);
8125 static __inline__ __m128i __DEFAULT_FN_ATTRS
8126 _mm_cvtusepi64_epi16 (__m128i __A
)
8128 return (__m128i
) __builtin_ia32_pmovusqw128_mask ((__v2di
) __A
,
8129 (__v8hi
)_mm_undefined_si128(),
8133 static __inline__ __m128i __DEFAULT_FN_ATTRS
8134 _mm_mask_cvtusepi64_epi16 (__m128i __O
, __mmask8 __M
, __m128i __A
)
8136 return (__m128i
) __builtin_ia32_pmovusqw128_mask ((__v2di
) __A
,
8140 static __inline__ __m128i __DEFAULT_FN_ATTRS
8141 _mm_maskz_cvtusepi64_epi16 (__mmask8 __M
, __m128i __A
)
8143 return (__m128i
) __builtin_ia32_pmovusqw128_mask ((__v2di
) __A
,
8144 (__v8hi
) _mm_setzero_si128 (),
8148 static __inline__
void __DEFAULT_FN_ATTRS
8149 _mm_mask_cvtusepi64_storeu_epi16 (void * __P
, __mmask8 __M
, __m128i __A
)
8151 __builtin_ia32_pmovusqw128mem_mask ((__v8hi
*) __P
, (__v2di
) __A
, __M
);
8154 static __inline__ __m128i __DEFAULT_FN_ATTRS
8155 _mm256_cvtusepi64_epi16 (__m256i __A
)
8157 return (__m128i
) __builtin_ia32_pmovusqw256_mask ((__v4di
) __A
,
8158 (__v8hi
)_mm_undefined_si128(),
8162 static __inline__ __m128i __DEFAULT_FN_ATTRS
8163 _mm256_mask_cvtusepi64_epi16 (__m128i __O
, __mmask8 __M
, __m256i __A
)
8165 return (__m128i
) __builtin_ia32_pmovusqw256_mask ((__v4di
) __A
,
8169 static __inline__ __m128i __DEFAULT_FN_ATTRS
8170 _mm256_maskz_cvtusepi64_epi16 (__mmask8 __M
, __m256i __A
)
8172 return (__m128i
) __builtin_ia32_pmovusqw256_mask ((__v4di
) __A
,
8173 (__v8hi
) _mm_setzero_si128 (),
8177 static __inline__
void __DEFAULT_FN_ATTRS
8178 _mm256_mask_cvtusepi64_storeu_epi16 (void * __P
, __mmask8 __M
, __m256i __A
)
8180 return __builtin_ia32_pmovusqw256mem_mask ((__v8hi
*) __P
, (__v4di
) __A
, __M
);
8183 static __inline__ __m128i __DEFAULT_FN_ATTRS
8184 _mm_cvtepi32_epi8 (__m128i __A
)
8186 return (__m128i
) __builtin_ia32_pmovdb128_mask ((__v4si
) __A
,
8187 (__v16qi
)_mm_undefined_si128(),
8191 static __inline__ __m128i __DEFAULT_FN_ATTRS
8192 _mm_mask_cvtepi32_epi8 (__m128i __O
, __mmask8 __M
, __m128i __A
)
8194 return (__m128i
) __builtin_ia32_pmovdb128_mask ((__v4si
) __A
,
8195 (__v16qi
) __O
, __M
);
8198 static __inline__ __m128i __DEFAULT_FN_ATTRS
8199 _mm_maskz_cvtepi32_epi8 (__mmask8 __M
, __m128i __A
)
8201 return (__m128i
) __builtin_ia32_pmovdb128_mask ((__v4si
) __A
,
8203 _mm_setzero_si128 (),
8207 static __inline__
void __DEFAULT_FN_ATTRS
8208 _mm_mask_cvtepi32_storeu_epi8 (void * __P
, __mmask8 __M
, __m128i __A
)
8210 __builtin_ia32_pmovdb128mem_mask ((__v16qi
*) __P
, (__v4si
) __A
, __M
);
8213 static __inline__ __m128i __DEFAULT_FN_ATTRS
8214 _mm256_cvtepi32_epi8 (__m256i __A
)
8216 return (__m128i
) __builtin_ia32_pmovdb256_mask ((__v8si
) __A
,
8217 (__v16qi
)_mm_undefined_si128(),
8221 static __inline__ __m128i __DEFAULT_FN_ATTRS
8222 _mm256_mask_cvtepi32_epi8 (__m128i __O
, __mmask8 __M
, __m256i __A
)
8224 return (__m128i
) __builtin_ia32_pmovdb256_mask ((__v8si
) __A
,
8225 (__v16qi
) __O
, __M
);
8228 static __inline__ __m128i __DEFAULT_FN_ATTRS
8229 _mm256_maskz_cvtepi32_epi8 (__mmask8 __M
, __m256i __A
)
8231 return (__m128i
) __builtin_ia32_pmovdb256_mask ((__v8si
) __A
,
8232 (__v16qi
) _mm_setzero_si128 (),
8236 static __inline__
void __DEFAULT_FN_ATTRS
8237 _mm256_mask_cvtepi32_storeu_epi8 (void * __P
, __mmask8 __M
, __m256i __A
)
8239 __builtin_ia32_pmovdb256mem_mask ((__v16qi
*) __P
, (__v8si
) __A
, __M
);
8242 static __inline__ __m128i __DEFAULT_FN_ATTRS
8243 _mm_cvtepi32_epi16 (__m128i __A
)
8245 return (__m128i
) __builtin_ia32_pmovdw128_mask ((__v4si
) __A
,
8246 (__v8hi
) _mm_setzero_si128 (),
8250 static __inline__ __m128i __DEFAULT_FN_ATTRS
8251 _mm_mask_cvtepi32_epi16 (__m128i __O
, __mmask8 __M
, __m128i __A
)
8253 return (__m128i
) __builtin_ia32_pmovdw128_mask ((__v4si
) __A
,
8257 static __inline__ __m128i __DEFAULT_FN_ATTRS
8258 _mm_maskz_cvtepi32_epi16 (__mmask8 __M
, __m128i __A
)
8260 return (__m128i
) __builtin_ia32_pmovdw128_mask ((__v4si
) __A
,
8261 (__v8hi
) _mm_setzero_si128 (),
8265 static __inline__
void __DEFAULT_FN_ATTRS
8266 _mm_mask_cvtepi32_storeu_epi16 (void * __P
, __mmask8 __M
, __m128i __A
)
8268 __builtin_ia32_pmovdw128mem_mask ((__v8hi
*) __P
, (__v4si
) __A
, __M
);
8271 static __inline__ __m128i __DEFAULT_FN_ATTRS
8272 _mm256_cvtepi32_epi16 (__m256i __A
)
8274 return (__m128i
) __builtin_ia32_pmovdw256_mask ((__v8si
) __A
,
8275 (__v8hi
)_mm_setzero_si128 (),
8279 static __inline__ __m128i __DEFAULT_FN_ATTRS
8280 _mm256_mask_cvtepi32_epi16 (__m128i __O
, __mmask8 __M
, __m256i __A
)
8282 return (__m128i
) __builtin_ia32_pmovdw256_mask ((__v8si
) __A
,
8286 static __inline__ __m128i __DEFAULT_FN_ATTRS
8287 _mm256_maskz_cvtepi32_epi16 (__mmask8 __M
, __m256i __A
)
8289 return (__m128i
) __builtin_ia32_pmovdw256_mask ((__v8si
) __A
,
8290 (__v8hi
) _mm_setzero_si128 (),
8294 static __inline__
void __DEFAULT_FN_ATTRS
8295 _mm256_mask_cvtepi32_storeu_epi16 (void * __P
, __mmask8 __M
, __m256i __A
)
8297 __builtin_ia32_pmovdw256mem_mask ((__v8hi
*) __P
, (__v8si
) __A
, __M
);
8300 static __inline__ __m128i __DEFAULT_FN_ATTRS
8301 _mm_cvtepi64_epi8 (__m128i __A
)
8303 return (__m128i
) __builtin_ia32_pmovqb128_mask ((__v2di
) __A
,
8304 (__v16qi
) _mm_undefined_si128(),
8308 static __inline__ __m128i __DEFAULT_FN_ATTRS
8309 _mm_mask_cvtepi64_epi8 (__m128i __O
, __mmask8 __M
, __m128i __A
)
8311 return (__m128i
) __builtin_ia32_pmovqb128_mask ((__v2di
) __A
,
8312 (__v16qi
) __O
, __M
);
8315 static __inline__ __m128i __DEFAULT_FN_ATTRS
8316 _mm_maskz_cvtepi64_epi8 (__mmask8 __M
, __m128i __A
)
8318 return (__m128i
) __builtin_ia32_pmovqb128_mask ((__v2di
) __A
,
8319 (__v16qi
) _mm_setzero_si128 (),
8323 static __inline__
void __DEFAULT_FN_ATTRS
8324 _mm_mask_cvtepi64_storeu_epi8 (void * __P
, __mmask8 __M
, __m128i __A
)
8326 __builtin_ia32_pmovqb128mem_mask ((__v16qi
*) __P
, (__v2di
) __A
, __M
);
8329 static __inline__ __m128i __DEFAULT_FN_ATTRS
8330 _mm256_cvtepi64_epi8 (__m256i __A
)
8332 return (__m128i
) __builtin_ia32_pmovqb256_mask ((__v4di
) __A
,
8333 (__v16qi
) _mm_undefined_si128(),
8337 static __inline__ __m128i __DEFAULT_FN_ATTRS
8338 _mm256_mask_cvtepi64_epi8 (__m128i __O
, __mmask8 __M
, __m256i __A
)
8340 return (__m128i
) __builtin_ia32_pmovqb256_mask ((__v4di
) __A
,
8341 (__v16qi
) __O
, __M
);
8344 static __inline__ __m128i __DEFAULT_FN_ATTRS
8345 _mm256_maskz_cvtepi64_epi8 (__mmask8 __M
, __m256i __A
)
8347 return (__m128i
) __builtin_ia32_pmovqb256_mask ((__v4di
) __A
,
8348 (__v16qi
) _mm_setzero_si128 (),
8352 static __inline__
void __DEFAULT_FN_ATTRS
8353 _mm256_mask_cvtepi64_storeu_epi8 (void * __P
, __mmask8 __M
, __m256i __A
)
8355 __builtin_ia32_pmovqb256mem_mask ((__v16qi
*) __P
, (__v4di
) __A
, __M
);
8358 static __inline__ __m128i __DEFAULT_FN_ATTRS
8359 _mm_cvtepi64_epi32 (__m128i __A
)
8361 return (__m128i
) __builtin_ia32_pmovqd128_mask ((__v2di
) __A
,
8362 (__v4si
)_mm_undefined_si128(),
8366 static __inline__ __m128i __DEFAULT_FN_ATTRS
8367 _mm_mask_cvtepi64_epi32 (__m128i __O
, __mmask8 __M
, __m128i __A
)
8369 return (__m128i
) __builtin_ia32_pmovqd128_mask ((__v2di
) __A
,
8373 static __inline__ __m128i __DEFAULT_FN_ATTRS
8374 _mm_maskz_cvtepi64_epi32 (__mmask8 __M
, __m128i __A
)
8376 return (__m128i
) __builtin_ia32_pmovqd128_mask ((__v2di
) __A
,
8377 (__v4si
) _mm_setzero_si128 (),
8381 static __inline__
void __DEFAULT_FN_ATTRS
8382 _mm_mask_cvtepi64_storeu_epi32 (void * __P
, __mmask8 __M
, __m128i __A
)
8384 __builtin_ia32_pmovqd128mem_mask ((__v4si
*) __P
, (__v2di
) __A
, __M
);
8387 static __inline__ __m128i __DEFAULT_FN_ATTRS
8388 _mm256_cvtepi64_epi32 (__m256i __A
)
8390 return (__m128i
) __builtin_ia32_pmovqd256_mask ((__v4di
) __A
,
8391 (__v4si
) _mm_undefined_si128(),
8395 static __inline__ __m128i __DEFAULT_FN_ATTRS
8396 _mm256_mask_cvtepi64_epi32 (__m128i __O
, __mmask8 __M
, __m256i __A
)
8398 return (__m128i
) __builtin_ia32_pmovqd256_mask ((__v4di
) __A
,
8402 static __inline__ __m128i __DEFAULT_FN_ATTRS
8403 _mm256_maskz_cvtepi64_epi32 (__mmask8 __M
, __m256i __A
)
8405 return (__m128i
) __builtin_ia32_pmovqd256_mask ((__v4di
) __A
,
8406 (__v4si
) _mm_setzero_si128 (),
8410 static __inline__
void __DEFAULT_FN_ATTRS
8411 _mm256_mask_cvtepi64_storeu_epi32 (void * __P
, __mmask8 __M
, __m256i __A
)
8413 __builtin_ia32_pmovqd256mem_mask ((__v4si
*) __P
, (__v4di
) __A
, __M
);
8416 static __inline__ __m128i __DEFAULT_FN_ATTRS
8417 _mm_cvtepi64_epi16 (__m128i __A
)
8419 return (__m128i
) __builtin_ia32_pmovqw128_mask ((__v2di
) __A
,
8420 (__v8hi
) _mm_undefined_si128(),
8424 static __inline__ __m128i __DEFAULT_FN_ATTRS
8425 _mm_mask_cvtepi64_epi16 (__m128i __O
, __mmask8 __M
, __m128i __A
)
8427 return (__m128i
) __builtin_ia32_pmovqw128_mask ((__v2di
) __A
,
8432 static __inline__ __m128i __DEFAULT_FN_ATTRS
8433 _mm_maskz_cvtepi64_epi16 (__mmask8 __M
, __m128i __A
)
8435 return (__m128i
) __builtin_ia32_pmovqw128_mask ((__v2di
) __A
,
8436 (__v8hi
) _mm_setzero_si128 (),
8440 static __inline__
void __DEFAULT_FN_ATTRS
8441 _mm_mask_cvtepi64_storeu_epi16 (void * __P
, __mmask8 __M
, __m128i __A
)
8443 __builtin_ia32_pmovqw128mem_mask ((__v8hi
*) __P
, (__v2di
) __A
, __M
);
8446 static __inline__ __m128i __DEFAULT_FN_ATTRS
8447 _mm256_cvtepi64_epi16 (__m256i __A
)
8449 return (__m128i
) __builtin_ia32_pmovqw256_mask ((__v4di
) __A
,
8450 (__v8hi
)_mm_undefined_si128(),
8454 static __inline__ __m128i __DEFAULT_FN_ATTRS
8455 _mm256_mask_cvtepi64_epi16 (__m128i __O
, __mmask8 __M
, __m256i __A
)
8457 return (__m128i
) __builtin_ia32_pmovqw256_mask ((__v4di
) __A
,
8461 static __inline__ __m128i __DEFAULT_FN_ATTRS
8462 _mm256_maskz_cvtepi64_epi16 (__mmask8 __M
, __m256i __A
)
8464 return (__m128i
) __builtin_ia32_pmovqw256_mask ((__v4di
) __A
,
8465 (__v8hi
) _mm_setzero_si128 (),
8469 static __inline__
void __DEFAULT_FN_ATTRS
8470 _mm256_mask_cvtepi64_storeu_epi16 (void * __P
, __mmask8 __M
, __m256i __A
)
8472 __builtin_ia32_pmovqw256mem_mask ((__v8hi
*) __P
, (__v4di
) __A
, __M
);
8475 #define _mm256_extractf32x4_ps(A, imm) __extension__ ({ \
8476 (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
8478 (__v4sf)_mm_setzero_ps(), \
8481 #define _mm256_mask_extractf32x4_ps(W, U, A, imm) __extension__ ({ \
8482 (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
8484 (__v4sf)(__m128)(W), \
8487 #define _mm256_maskz_extractf32x4_ps(U, A, imm) __extension__ ({ \
8488 (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
8490 (__v4sf)_mm_setzero_ps(), \
8493 #define _mm256_extracti32x4_epi32(A, imm) __extension__ ({ \
8494 (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
8496 (__v4si)_mm_setzero_si128(), \
8499 #define _mm256_mask_extracti32x4_epi32(W, U, A, imm) __extension__ ({ \
8500 (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
8502 (__v4si)(__m128i)(W), \
8505 #define _mm256_maskz_extracti32x4_epi32(U, A, imm) __extension__ ({ \
8506 (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
8508 (__v4si)_mm_setzero_si128(), \
8511 #define _mm256_insertf32x4(A, B, imm) __extension__ ({ \
8512 (__m256)__builtin_ia32_insertf32x4_256_mask((__v8sf)(__m256)(A), \
8513 (__v4sf)(__m128)(B), (int)(imm), \
8514 (__v8sf)_mm256_setzero_ps(), \
8517 #define _mm256_mask_insertf32x4(W, U, A, B, imm) __extension__ ({ \
8518 (__m256)__builtin_ia32_insertf32x4_256_mask((__v8sf)(__m256)(A), \
8519 (__v4sf)(__m128)(B), (int)(imm), \
8520 (__v8sf)(__m256)(W), \
8523 #define _mm256_maskz_insertf32x4(U, A, B, imm) __extension__ ({ \
8524 (__m256)__builtin_ia32_insertf32x4_256_mask((__v8sf)(__m256)(A), \
8525 (__v4sf)(__m128)(B), (int)(imm), \
8526 (__v8sf)_mm256_setzero_ps(), \
8529 #define _mm256_inserti32x4(A, B, imm) __extension__ ({ \
8530 (__m256i)__builtin_ia32_inserti32x4_256_mask((__v8si)(__m256i)(A), \
8531 (__v4si)(__m128i)(B), \
8533 (__v8si)_mm256_setzero_si256(), \
8536 #define _mm256_mask_inserti32x4(W, U, A, B, imm) __extension__ ({ \
8537 (__m256i)__builtin_ia32_inserti32x4_256_mask((__v8si)(__m256i)(A), \
8538 (__v4si)(__m128i)(B), \
8540 (__v8si)(__m256i)(W), \
8543 #define _mm256_maskz_inserti32x4(U, A, B, imm) __extension__ ({ \
8544 (__m256i)__builtin_ia32_inserti32x4_256_mask((__v8si)(__m256i)(A), \
8545 (__v4si)(__m128i)(B), \
8547 (__v8si)_mm256_setzero_si256(), \
8550 #define _mm_getmant_pd(A, B, C) __extension__({\
8551 (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
8552 (int)(((C)<<2) | (B)), \
8553 (__v2df)_mm_setzero_pd(), \
8556 #define _mm_mask_getmant_pd(W, U, A, B, C) __extension__({\
8557 (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
8558 (int)(((C)<<2) | (B)), \
8559 (__v2df)(__m128d)(W), \
8562 #define _mm_maskz_getmant_pd(U, A, B, C) __extension__({\
8563 (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
8564 (int)(((C)<<2) | (B)), \
8565 (__v2df)_mm_setzero_pd(), \
8568 #define _mm256_getmant_pd(A, B, C) __extension__ ({ \
8569 (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
8570 (int)(((C)<<2) | (B)), \
8571 (__v4df)_mm256_setzero_pd(), \
8574 #define _mm256_mask_getmant_pd(W, U, A, B, C) __extension__ ({ \
8575 (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
8576 (int)(((C)<<2) | (B)), \
8577 (__v4df)(__m256d)(W), \
8580 #define _mm256_maskz_getmant_pd(U, A, B, C) __extension__ ({ \
8581 (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
8582 (int)(((C)<<2) | (B)), \
8583 (__v4df)_mm256_setzero_pd(), \
8586 #define _mm_getmant_ps(A, B, C) __extension__ ({ \
8587 (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
8588 (int)(((C)<<2) | (B)), \
8589 (__v4sf)_mm_setzero_ps(), \
8592 #define _mm_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \
8593 (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
8594 (int)(((C)<<2) | (B)), \
8595 (__v4sf)(__m128)(W), \
8598 #define _mm_maskz_getmant_ps(U, A, B, C) __extension__ ({ \
8599 (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
8600 (int)(((C)<<2) | (B)), \
8601 (__v4sf)_mm_setzero_ps(), \
8604 #define _mm256_getmant_ps(A, B, C) __extension__ ({ \
8605 (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
8606 (int)(((C)<<2) | (B)), \
8607 (__v8sf)_mm256_setzero_ps(), \
8610 #define _mm256_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \
8611 (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
8612 (int)(((C)<<2) | (B)), \
8613 (__v8sf)(__m256)(W), \
8616 #define _mm256_maskz_getmant_ps(U, A, B, C) __extension__ ({ \
8617 (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
8618 (int)(((C)<<2) | (B)), \
8619 (__v8sf)_mm256_setzero_ps(), \
8622 #define _mm_mmask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
8623 (__m128d)__builtin_ia32_gather3div2df((__v2df)(__m128d)(v1_old), \
8624 (double const *)(addr), \
8625 (__v2di)(__m128i)(index), \
8626 (__mmask8)(mask), (int)(scale)); })
8628 #define _mm_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
8629 (__m128i)__builtin_ia32_gather3div2di((__v2di)(__m128i)(v1_old), \
8630 (long long const *)(addr), \
8631 (__v2di)(__m128i)(index), \
8632 (__mmask8)(mask), (int)(scale)); })
8634 #define _mm256_mmask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
8635 (__m256d)__builtin_ia32_gather3div4df((__v4df)(__m256d)(v1_old), \
8636 (double const *)(addr), \
8637 (__v4di)(__m256i)(index), \
8638 (__mmask8)(mask), (int)(scale)); })
8640 #define _mm256_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
8641 (__m256i)__builtin_ia32_gather3div4di((__v4di)(__m256i)(v1_old), \
8642 (long long const *)(addr), \
8643 (__v4di)(__m256i)(index), \
8644 (__mmask8)(mask), (int)(scale)); })
8646 #define _mm_mmask_i64gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
8647 (__m128)__builtin_ia32_gather3div4sf((__v4sf)(__m128)(v1_old), \
8648 (float const *)(addr), \
8649 (__v2di)(__m128i)(index), \
8650 (__mmask8)(mask), (int)(scale)); })
8652 #define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
8653 (__m128i)__builtin_ia32_gather3div4si((__v4si)(__m128i)(v1_old), \
8654 (int const *)(addr), \
8655 (__v2di)(__m128i)(index), \
8656 (__mmask8)(mask), (int)(scale)); })
8658 #define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
8659 (__m128)__builtin_ia32_gather3div8sf((__v4sf)(__m128)(v1_old), \
8660 (float const *)(addr), \
8661 (__v4di)(__m256i)(index), \
8662 (__mmask8)(mask), (int)(scale)); })
8664 #define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
8665 (__m128i)__builtin_ia32_gather3div8si((__v4si)(__m128i)(v1_old), \
8666 (int const *)(addr), \
8667 (__v4di)(__m256i)(index), \
8668 (__mmask8)(mask), (int)(scale)); })
8670 #define _mm_mmask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
8671 (__m128d)__builtin_ia32_gather3siv2df((__v2df)(__m128d)(v1_old), \
8672 (double const *)(addr), \
8673 (__v4si)(__m128i)(index), \
8674 (__mmask8)(mask), (int)(scale)); })
8676 #define _mm_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
8677 (__m128i)__builtin_ia32_gather3siv2di((__v2di)(__m128i)(v1_old), \
8678 (long long const *)(addr), \
8679 (__v4si)(__m128i)(index), \
8680 (__mmask8)(mask), (int)(scale)); })
8682 #define _mm256_mmask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
8683 (__m256d)__builtin_ia32_gather3siv4df((__v4df)(__m256d)(v1_old), \
8684 (double const *)(addr), \
8685 (__v4si)(__m128i)(index), \
8686 (__mmask8)(mask), (int)(scale)); })
8688 #define _mm256_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
8689 (__m256i)__builtin_ia32_gather3siv4di((__v4di)(__m256i)(v1_old), \
8690 (long long const *)(addr), \
8691 (__v4si)(__m128i)(index), \
8692 (__mmask8)(mask), (int)(scale)); })
8694 #define _mm_mmask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
8695 (__m128)__builtin_ia32_gather3siv4sf((__v4sf)(__m128)(v1_old), \
8696 (float const *)(addr), \
8697 (__v4si)(__m128i)(index), \
8698 (__mmask8)(mask), (int)(scale)); })
8700 #define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
8701 (__m128i)__builtin_ia32_gather3siv4si((__v4si)(__m128i)(v1_old), \
8702 (int const *)(addr), \
8703 (__v4si)(__m128i)(index), \
8704 (__mmask8)(mask), (int)(scale)); })
8706 #define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
8707 (__m256)__builtin_ia32_gather3siv8sf((__v8sf)(__m256)(v1_old), \
8708 (float const *)(addr), \
8709 (__v8si)(__m256i)(index), \
8710 (__mmask8)(mask), (int)(scale)); })
8712 #define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
8713 (__m256i)__builtin_ia32_gather3siv8si((__v8si)(__m256i)(v1_old), \
8714 (int const *)(addr), \
8715 (__v8si)(__m256i)(index), \
8716 (__mmask8)(mask), (int)(scale)); })
8718 #define _mm256_permutex_pd(X, C) __extension__ ({ \
8719 (__m256d)__builtin_shufflevector((__v4df)(__m256d)(X), \
8720 (__v4df)_mm256_undefined_pd(), \
8721 ((C) >> 0) & 0x3, ((C) >> 2) & 0x3, \
8722 ((C) >> 4) & 0x3, ((C) >> 6) & 0x3); })
8724 #define _mm256_mask_permutex_pd(W, U, X, C) __extension__ ({ \
8725 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
8726 (__v4df)_mm256_permutex_pd((X), (C)), \
8727 (__v4df)(__m256d)(W)); })
8729 #define _mm256_maskz_permutex_pd(U, X, C) __extension__ ({ \
8730 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
8731 (__v4df)_mm256_permutex_pd((X), (C)), \
8732 (__v4df)_mm256_setzero_pd()); })
8734 #define _mm256_permutex_epi64(X, C) __extension__ ({ \
8735 (__m256i)__builtin_shufflevector((__v4di)(__m256i)(X), \
8736 (__v4di)_mm256_undefined_si256(), \
8737 ((C) >> 0) & 0x3, ((C) >> 2) & 0x3, \
8738 ((C) >> 4) & 0x3, ((C) >> 6) & 0x3); })
8740 #define _mm256_mask_permutex_epi64(W, U, X, C) __extension__ ({ \
8741 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8742 (__v4di)_mm256_permutex_epi64((X), (C)), \
8743 (__v4di)(__m256i)(W)); })
8745 #define _mm256_maskz_permutex_epi64(U, X, C) __extension__ ({ \
8746 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8747 (__v4di)_mm256_permutex_epi64((X), (C)), \
8748 (__v4di)_mm256_setzero_si256()); })
8750 static __inline__ __m256d __DEFAULT_FN_ATTRS
8751 _mm256_permutexvar_pd (__m256i __X
, __m256d __Y
)
8753 return (__m256d
) __builtin_ia32_permvardf256_mask ((__v4df
) __Y
,
8755 (__v4df
) _mm256_undefined_si256 (),
8759 static __inline__ __m256d __DEFAULT_FN_ATTRS
8760 _mm256_mask_permutexvar_pd (__m256d __W
, __mmask8 __U
, __m256i __X
,
8763 return (__m256d
) __builtin_ia32_permvardf256_mask ((__v4df
) __Y
,
8769 static __inline__ __m256d __DEFAULT_FN_ATTRS
8770 _mm256_maskz_permutexvar_pd (__mmask8 __U
, __m256i __X
, __m256d __Y
)
8772 return (__m256d
) __builtin_ia32_permvardf256_mask ((__v4df
) __Y
,
8774 (__v4df
) _mm256_setzero_pd (),
8778 static __inline__ __m256i __DEFAULT_FN_ATTRS
8779 _mm256_maskz_permutexvar_epi64 (__mmask8 __M
, __m256i __X
, __m256i __Y
)
8781 return (__m256i
) __builtin_ia32_permvardi256_mask ((__v4di
) __Y
,
8783 (__v4di
) _mm256_setzero_si256 (),
8787 static __inline__ __m256i __DEFAULT_FN_ATTRS
8788 _mm256_permutexvar_epi64 ( __m256i __X
, __m256i __Y
)
8790 return (__m256i
) __builtin_ia32_permvardi256_mask ((__v4di
) __Y
,
8792 (__v4di
) _mm256_undefined_si256 (),
8796 static __inline__ __m256i __DEFAULT_FN_ATTRS
8797 _mm256_mask_permutexvar_epi64 (__m256i __W
, __mmask8 __M
, __m256i __X
,
8800 return (__m256i
) __builtin_ia32_permvardi256_mask ((__v4di
) __Y
,
8806 static __inline__ __m256 __DEFAULT_FN_ATTRS
8807 _mm256_mask_permutexvar_ps (__m256 __W
, __mmask8 __U
, __m256i __X
,
8810 return (__m256
) __builtin_ia32_permvarsf256_mask ((__v8sf
) __Y
,
8816 static __inline__ __m256 __DEFAULT_FN_ATTRS
8817 _mm256_maskz_permutexvar_ps (__mmask8 __U
, __m256i __X
, __m256 __Y
)
8819 return (__m256
) __builtin_ia32_permvarsf256_mask ((__v8sf
) __Y
,
8821 (__v8sf
) _mm256_setzero_ps (),
8825 static __inline__ __m256 __DEFAULT_FN_ATTRS
8826 _mm256_permutexvar_ps (__m256i __X
, __m256 __Y
)
8828 return (__m256
) __builtin_ia32_permvarsf256_mask ((__v8sf
) __Y
,
8830 (__v8sf
) _mm256_undefined_si256 (),
8834 static __inline__ __m256i __DEFAULT_FN_ATTRS
8835 _mm256_maskz_permutexvar_epi32 (__mmask8 __M
, __m256i __X
, __m256i __Y
)
8837 return (__m256i
) __builtin_ia32_permvarsi256_mask ((__v8si
) __Y
,
8839 (__v8si
) _mm256_setzero_si256 (),
8843 static __inline__ __m256i __DEFAULT_FN_ATTRS
8844 _mm256_mask_permutexvar_epi32 (__m256i __W
, __mmask8 __M
, __m256i __X
,
8847 return (__m256i
) __builtin_ia32_permvarsi256_mask ((__v8si
) __Y
,
8853 static __inline__ __m256i __DEFAULT_FN_ATTRS
8854 _mm256_permutexvar_epi32 (__m256i __X
, __m256i __Y
)
8856 return (__m256i
) __builtin_ia32_permvarsi256_mask ((__v8si
) __Y
,
8858 (__v8si
) _mm256_undefined_si256(),
8862 #define _mm_alignr_epi32(A, B, imm) __extension__ ({ \
8863 (__m128i)__builtin_ia32_alignd128_mask((__v4si)(__m128i)(A), \
8864 (__v4si)(__m128i)(B), (int)(imm), \
8865 (__v4si)_mm_undefined_si128(), \
8868 #define _mm_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({ \
8869 (__m128i)__builtin_ia32_alignd128_mask((__v4si)(__m128i)(A), \
8870 (__v4si)(__m128i)(B), (int)(imm), \
8871 (__v4si)(__m128i)(W), \
8874 #define _mm_maskz_alignr_epi32(U, A, B, imm) __extension__ ({ \
8875 (__m128i)__builtin_ia32_alignd128_mask((__v4si)(__m128i)(A), \
8876 (__v4si)(__m128i)(B), (int)(imm), \
8877 (__v4si)_mm_setzero_si128(), \
8880 #define _mm256_alignr_epi32(A, B, imm) __extension__ ({ \
8881 (__m256i)__builtin_ia32_alignd256_mask((__v8si)(__m256i)(A), \
8882 (__v8si)(__m256i)(B), (int)(imm), \
8883 (__v8si)_mm256_undefined_si256(), \
8886 #define _mm256_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({ \
8887 (__m256i)__builtin_ia32_alignd256_mask((__v8si)(__m256i)(A), \
8888 (__v8si)(__m256i)(B), (int)(imm), \
8889 (__v8si)(__m256i)(W), \
8892 #define _mm256_maskz_alignr_epi32(U, A, B, imm) __extension__ ({ \
8893 (__m256i)__builtin_ia32_alignd256_mask((__v8si)(__m256i)(A), \
8894 (__v8si)(__m256i)(B), (int)(imm), \
8895 (__v8si)_mm256_setzero_si256(), \
8898 #define _mm_alignr_epi64(A, B, imm) __extension__ ({ \
8899 (__m128i)__builtin_ia32_alignq128_mask((__v2di)(__m128i)(A), \
8900 (__v2di)(__m128i)(B), (int)(imm), \
8901 (__v2di)_mm_setzero_di(), \
8904 #define _mm_mask_alignr_epi64(W, U, A, B, imm) __extension__ ({ \
8905 (__m128i)__builtin_ia32_alignq128_mask((__v2di)(__m128i)(A), \
8906 (__v2di)(__m128i)(B), (int)(imm), \
8907 (__v2di)(__m128i)(W), \
8910 #define _mm_maskz_alignr_epi64(U, A, B, imm) __extension__ ({ \
8911 (__m128i)__builtin_ia32_alignq128_mask((__v2di)(__m128i)(A), \
8912 (__v2di)(__m128i)(B), (int)(imm), \
8913 (__v2di)_mm_setzero_di(), \
8916 #define _mm256_alignr_epi64(A, B, imm) __extension__ ({ \
8917 (__m256i)__builtin_ia32_alignq256_mask((__v4di)(__m256i)(A), \
8918 (__v4di)(__m256i)(B), (int)(imm), \
8919 (__v4di)_mm256_undefined_pd(), \
8922 #define _mm256_mask_alignr_epi64(W, U, A, B, imm) __extension__ ({ \
8923 (__m256i)__builtin_ia32_alignq256_mask((__v4di)(__m256i)(A), \
8924 (__v4di)(__m256i)(B), (int)(imm), \
8925 (__v4di)(__m256i)(W), \
8928 #define _mm256_maskz_alignr_epi64(U, A, B, imm) __extension__ ({ \
8929 (__m256i)__builtin_ia32_alignq256_mask((__v4di)(__m256i)(A), \
8930 (__v4di)(__m256i)(B), (int)(imm), \
8931 (__v4di)_mm256_setzero_si256(), \
8934 static __inline__ __m128 __DEFAULT_FN_ATTRS
8935 _mm_mask_movehdup_ps (__m128 __W
, __mmask8 __U
, __m128 __A
)
8937 return (__m128
)__builtin_ia32_selectps_128((__mmask8
)__U
,
8938 (__v4sf
)_mm_movehdup_ps(__A
),
8942 static __inline__ __m128 __DEFAULT_FN_ATTRS
8943 _mm_maskz_movehdup_ps (__mmask8 __U
, __m128 __A
)
8945 return (__m128
)__builtin_ia32_selectps_128((__mmask8
)__U
,
8946 (__v4sf
)_mm_movehdup_ps(__A
),
8947 (__v4sf
)_mm_setzero_ps());
8950 static __inline__ __m256 __DEFAULT_FN_ATTRS
8951 _mm256_mask_movehdup_ps (__m256 __W
, __mmask8 __U
, __m256 __A
)
8953 return (__m256
)__builtin_ia32_selectps_256((__mmask8
)__U
,
8954 (__v8sf
)_mm256_movehdup_ps(__A
),
8958 static __inline__ __m256 __DEFAULT_FN_ATTRS
8959 _mm256_maskz_movehdup_ps (__mmask8 __U
, __m256 __A
)
8961 return (__m256
)__builtin_ia32_selectps_256((__mmask8
)__U
,
8962 (__v8sf
)_mm256_movehdup_ps(__A
),
8963 (__v8sf
)_mm256_setzero_ps());
8966 static __inline__ __m128 __DEFAULT_FN_ATTRS
8967 _mm_mask_moveldup_ps (__m128 __W
, __mmask8 __U
, __m128 __A
)
8969 return (__m128
)__builtin_ia32_selectps_128((__mmask8
)__U
,
8970 (__v4sf
)_mm_moveldup_ps(__A
),
8974 static __inline__ __m128 __DEFAULT_FN_ATTRS
8975 _mm_maskz_moveldup_ps (__mmask8 __U
, __m128 __A
)
8977 return (__m128
)__builtin_ia32_selectps_128((__mmask8
)__U
,
8978 (__v4sf
)_mm_moveldup_ps(__A
),
8979 (__v4sf
)_mm_setzero_ps());
8982 static __inline__ __m256 __DEFAULT_FN_ATTRS
8983 _mm256_mask_moveldup_ps (__m256 __W
, __mmask8 __U
, __m256 __A
)
8985 return (__m256
)__builtin_ia32_selectps_256((__mmask8
)__U
,
8986 (__v8sf
)_mm256_moveldup_ps(__A
),
8990 static __inline__ __m256 __DEFAULT_FN_ATTRS
8991 _mm256_maskz_moveldup_ps (__mmask8 __U
, __m256 __A
)
8993 return (__m256
)__builtin_ia32_selectps_256((__mmask8
)__U
,
8994 (__v8sf
)_mm256_moveldup_ps(__A
),
8995 (__v8sf
)_mm256_setzero_ps());
8998 #define _mm256_mask_shuffle_epi32(W, U, A, I) __extension__({\
8999 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
9000 (__v8si)_mm256_shuffle_epi32((A), (I)), \
9001 (__v8si)(__m256i)(W)); })
9003 #define _mm256_maskz_shuffle_epi32(U, A, I) __extension__({\
9004 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
9005 (__v8si)_mm256_shuffle_epi32((A), (I)), \
9006 (__v8si)_mm256_setzero_si256()); })
9008 #define _mm_mask_shuffle_epi32(W, U, A, I) __extension__({\
9009 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
9010 (__v4si)_mm_shuffle_epi32((A), (I)), \
9011 (__v4si)(__m128i)(W)); })
9013 #define _mm_maskz_shuffle_epi32(U, A, I) __extension__({\
9014 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
9015 (__v4si)_mm_shuffle_epi32((A), (I)), \
9016 (__v4si)_mm_setzero_si128()); })
9018 static __inline__ __m128d __DEFAULT_FN_ATTRS
9019 _mm_mask_mov_pd (__m128d __W
, __mmask8 __U
, __m128d __A
)
9021 return (__m128d
) __builtin_ia32_selectpd_128 ((__mmask8
) __U
,
9026 static __inline__ __m128d __DEFAULT_FN_ATTRS
9027 _mm_maskz_mov_pd (__mmask8 __U
, __m128d __A
)
9029 return (__m128d
) __builtin_ia32_selectpd_128 ((__mmask8
) __U
,
9031 (__v2df
) _mm_setzero_pd ());
9034 static __inline__ __m256d __DEFAULT_FN_ATTRS
9035 _mm256_mask_mov_pd (__m256d __W
, __mmask8 __U
, __m256d __A
)
9037 return (__m256d
) __builtin_ia32_selectpd_256 ((__mmask8
) __U
,
9042 static __inline__ __m256d __DEFAULT_FN_ATTRS
9043 _mm256_maskz_mov_pd (__mmask8 __U
, __m256d __A
)
9045 return (__m256d
) __builtin_ia32_selectpd_256 ((__mmask8
) __U
,
9047 (__v4df
) _mm256_setzero_pd ());
9050 static __inline__ __m128 __DEFAULT_FN_ATTRS
9051 _mm_mask_mov_ps (__m128 __W
, __mmask8 __U
, __m128 __A
)
9053 return (__m128
) __builtin_ia32_selectps_128 ((__mmask8
) __U
,
9058 static __inline__ __m128 __DEFAULT_FN_ATTRS
9059 _mm_maskz_mov_ps (__mmask8 __U
, __m128 __A
)
9061 return (__m128
) __builtin_ia32_selectps_128 ((__mmask8
) __U
,
9063 (__v4sf
) _mm_setzero_ps ());
9066 static __inline__ __m256 __DEFAULT_FN_ATTRS
9067 _mm256_mask_mov_ps (__m256 __W
, __mmask8 __U
, __m256 __A
)
9069 return (__m256
) __builtin_ia32_selectps_256 ((__mmask8
) __U
,
9074 static __inline__ __m256 __DEFAULT_FN_ATTRS
9075 _mm256_maskz_mov_ps (__mmask8 __U
, __m256 __A
)
9077 return (__m256
) __builtin_ia32_selectps_256 ((__mmask8
) __U
,
9079 (__v8sf
) _mm256_setzero_ps ());
9082 static __inline__ __m128 __DEFAULT_FN_ATTRS
9083 _mm_mask_cvtph_ps (__m128 __W
, __mmask8 __U
, __m128i __A
)
9085 return (__m128
) __builtin_ia32_vcvtph2ps_mask ((__v8hi
) __A
,
9090 static __inline__ __m128 __DEFAULT_FN_ATTRS
9091 _mm_maskz_cvtph_ps (__mmask8 __U
, __m128i __A
)
9093 return (__m128
) __builtin_ia32_vcvtph2ps_mask ((__v8hi
) __A
,
9099 static __inline__ __m256 __DEFAULT_FN_ATTRS
9100 _mm256_mask_cvtph_ps (__m256 __W
, __mmask8 __U
, __m128i __A
)
9102 return (__m256
) __builtin_ia32_vcvtph2ps256_mask ((__v8hi
) __A
,
9107 static __inline__ __m256 __DEFAULT_FN_ATTRS
9108 _mm256_maskz_cvtph_ps (__mmask8 __U
, __m128i __A
)
9110 return (__m256
) __builtin_ia32_vcvtph2ps256_mask ((__v8hi
) __A
,
9112 _mm256_setzero_ps (),
9116 static __inline __m128i __DEFAULT_FN_ATTRS
9117 _mm_mask_cvtps_ph (__m128i __W
, __mmask8 __U
, __m128 __A
)
9119 return (__m128i
) __builtin_ia32_vcvtps2ph_mask ((__v4sf
) __A
, _MM_FROUND_CUR_DIRECTION
,
9124 static __inline __m128i __DEFAULT_FN_ATTRS
9125 _mm_maskz_cvtps_ph (__mmask8 __U
, __m128 __A
)
9127 return (__m128i
) __builtin_ia32_vcvtps2ph_mask ((__v4sf
) __A
, _MM_FROUND_CUR_DIRECTION
,
9128 (__v8hi
) _mm_setzero_si128 (),
9132 #define _mm_mask_cvt_roundps_ph(W, U, A, I) __extension__ ({ \
9133 (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
9134 (__v8hi)(__m128i)(W), \
9137 #define _mm_maskz_cvt_roundps_ph(U, A, I) __extension__ ({ \
9138 (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
9139 (__v8hi)_mm_setzero_si128(), \
9142 static __inline __m128i __DEFAULT_FN_ATTRS
9143 _mm256_mask_cvtps_ph (__m128i __W
, __mmask8 __U
, __m256 __A
)
9145 return (__m128i
) __builtin_ia32_vcvtps2ph256_mask ((__v8sf
) __A
, _MM_FROUND_CUR_DIRECTION
,
9150 static __inline __m128i __DEFAULT_FN_ATTRS
9151 _mm256_maskz_cvtps_ph ( __mmask8 __U
, __m256 __A
)
9153 return (__m128i
) __builtin_ia32_vcvtps2ph256_mask ((__v8sf
) __A
, _MM_FROUND_CUR_DIRECTION
,
9154 (__v8hi
) _mm_setzero_si128(),
9157 #define _mm256_mask_cvt_roundps_ph(W, U, A, I) __extension__ ({ \
9158 (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
9159 (__v8hi)(__m128i)(W), \
9162 #define _mm256_maskz_cvt_roundps_ph(U, A, I) __extension__ ({ \
9163 (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
9164 (__v8hi)_mm_setzero_si128(), \
9168 #undef __DEFAULT_FN_ATTRS
9170 #endif /* __AVX512VLINTRIN_H */