[AVX-512] Enable QI-mode mask logic patterns on non-AVX-512DQ targets.
[official-gcc.git] / gcc / config / i386 / avx512vlintrin.h
blobb995cecc8c0442ce92f33e79f2a9c7a27fdb3ae0
1 /* Copyright (C) 2014-2015 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
24 #ifndef _IMMINTRIN_H_INCLUDED
25 #error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
26 #endif
28 #ifndef _AVX512VLINTRIN_H_INCLUDED
29 #define _AVX512VLINTRIN_H_INCLUDED
31 /* Doesn't require avx512vl target and is used in avx512dqintrin.h. */
32 extern __inline __m128i
33 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
34 _mm_setzero_di (void)
36 return __extension__ (__m128i)(__v2di){ 0LL, 0LL};
39 #ifndef __AVX512VL__
40 #pragma GCC push_options
41 #pragma GCC target("avx512vl")
42 #define __DISABLE_AVX512VL__
43 #endif /* __AVX512VL__ */
45 /* Internal data types for implementing the intrinsics. */
46 typedef unsigned int __mmask32;
48 extern __inline __m256d
49 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
50 _mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A)
52 return (__m256d) __builtin_ia32_movapd256_mask ((__v4df) __A,
53 (__v4df) __W,
54 (__mmask8) __U);
57 extern __inline __m256d
58 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
59 _mm256_maskz_mov_pd (__mmask8 __U, __m256d __A)
61 return (__m256d) __builtin_ia32_movapd256_mask ((__v4df) __A,
62 (__v4df)
63 _mm256_setzero_pd (),
64 (__mmask8) __U);
67 extern __inline __m128d
68 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
69 _mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A)
71 return (__m128d) __builtin_ia32_movapd128_mask ((__v2df) __A,
72 (__v2df) __W,
73 (__mmask8) __U);
76 extern __inline __m128d
77 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
78 _mm_maskz_mov_pd (__mmask8 __U, __m128d __A)
80 return (__m128d) __builtin_ia32_movapd128_mask ((__v2df) __A,
81 (__v2df)
82 _mm_setzero_pd (),
83 (__mmask8) __U);
86 extern __inline __m256d
87 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
88 _mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P)
90 return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
91 (__v4df) __W,
92 (__mmask8) __U);
95 extern __inline __m256d
96 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
97 _mm256_maskz_load_pd (__mmask8 __U, void const *__P)
99 return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
100 (__v4df)
101 _mm256_setzero_pd (),
102 (__mmask8) __U);
105 extern __inline __m128d
106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
107 _mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P)
109 return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
110 (__v2df) __W,
111 (__mmask8) __U);
114 extern __inline __m128d
115 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
116 _mm_maskz_load_pd (__mmask8 __U, void const *__P)
118 return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
119 (__v2df)
120 _mm_setzero_pd (),
121 (__mmask8) __U);
124 extern __inline void
125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
126 _mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A)
128 __builtin_ia32_storeapd256_mask ((__v4df *) __P,
129 (__v4df) __A,
130 (__mmask8) __U);
133 extern __inline void
134 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
135 _mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A)
137 __builtin_ia32_storeapd128_mask ((__v2df *) __P,
138 (__v2df) __A,
139 (__mmask8) __U);
142 extern __inline __m256
143 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
144 _mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A)
146 return (__m256) __builtin_ia32_movaps256_mask ((__v8sf) __A,
147 (__v8sf) __W,
148 (__mmask8) __U);
151 extern __inline __m256
152 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
153 _mm256_maskz_mov_ps (__mmask8 __U, __m256 __A)
155 return (__m256) __builtin_ia32_movaps256_mask ((__v8sf) __A,
156 (__v8sf)
157 _mm256_setzero_ps (),
158 (__mmask8) __U);
161 extern __inline __m128
162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
163 _mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A)
165 return (__m128) __builtin_ia32_movaps128_mask ((__v4sf) __A,
166 (__v4sf) __W,
167 (__mmask8) __U);
170 extern __inline __m128
171 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
172 _mm_maskz_mov_ps (__mmask8 __U, __m128 __A)
174 return (__m128) __builtin_ia32_movaps128_mask ((__v4sf) __A,
175 (__v4sf)
176 _mm_setzero_ps (),
177 (__mmask8) __U);
180 extern __inline __m256
181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
182 _mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P)
184 return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
185 (__v8sf) __W,
186 (__mmask8) __U);
189 extern __inline __m256
190 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
191 _mm256_maskz_load_ps (__mmask8 __U, void const *__P)
193 return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
194 (__v8sf)
195 _mm256_setzero_ps (),
196 (__mmask8) __U);
199 extern __inline __m128
200 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
201 _mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P)
203 return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
204 (__v4sf) __W,
205 (__mmask8) __U);
208 extern __inline __m128
209 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
210 _mm_maskz_load_ps (__mmask8 __U, void const *__P)
212 return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
213 (__v4sf)
214 _mm_setzero_ps (),
215 (__mmask8) __U);
218 extern __inline void
219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
220 _mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A)
222 __builtin_ia32_storeaps256_mask ((__v8sf *) __P,
223 (__v8sf) __A,
224 (__mmask8) __U);
227 extern __inline void
228 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
229 _mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A)
231 __builtin_ia32_storeaps128_mask ((__v4sf *) __P,
232 (__v4sf) __A,
233 (__mmask8) __U);
236 extern __inline __m256i
237 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
238 _mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
240 return (__m256i) __builtin_ia32_movdqa64_256_mask ((__v4di) __A,
241 (__v4di) __W,
242 (__mmask8) __U);
245 extern __inline __m256i
246 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
247 _mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A)
249 return (__m256i) __builtin_ia32_movdqa64_256_mask ((__v4di) __A,
250 (__v4di)
251 _mm256_setzero_si256 (),
252 (__mmask8) __U);
255 extern __inline __m128i
256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
257 _mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
259 return (__m128i) __builtin_ia32_movdqa64_128_mask ((__v2di) __A,
260 (__v2di) __W,
261 (__mmask8) __U);
264 extern __inline __m128i
265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
266 _mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A)
268 return (__m128i) __builtin_ia32_movdqa64_128_mask ((__v2di) __A,
269 (__v2di)
270 _mm_setzero_di (),
271 (__mmask8) __U);
274 extern __inline __m256i
275 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
276 _mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
278 return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
279 (__v4di) __W,
280 (__mmask8)
281 __U);
284 extern __inline __m256i
285 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
286 _mm256_maskz_load_epi64 (__mmask8 __U, void const *__P)
288 return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
289 (__v4di)
290 _mm256_setzero_si256 (),
291 (__mmask8)
292 __U);
295 extern __inline __m128i
296 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
297 _mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
299 return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
300 (__v2di) __W,
301 (__mmask8)
302 __U);
305 extern __inline __m128i
306 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
307 _mm_maskz_load_epi64 (__mmask8 __U, void const *__P)
309 return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
310 (__v2di)
311 _mm_setzero_di (),
312 (__mmask8)
313 __U);
316 extern __inline void
317 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
318 _mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A)
320 __builtin_ia32_movdqa64store256_mask ((__v4di *) __P,
321 (__v4di) __A,
322 (__mmask8) __U);
325 extern __inline void
326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
327 _mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A)
329 __builtin_ia32_movdqa64store128_mask ((__v2di *) __P,
330 (__v2di) __A,
331 (__mmask8) __U);
334 extern __inline __m256i
335 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
336 _mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
338 return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A,
339 (__v8si) __W,
340 (__mmask8) __U);
343 extern __inline __m256i
344 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
345 _mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A)
347 return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A,
348 (__v8si)
349 _mm256_setzero_si256 (),
350 (__mmask8) __U);
353 extern __inline __m128i
354 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
355 _mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
357 return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A,
358 (__v4si) __W,
359 (__mmask8) __U);
362 extern __inline __m128i
363 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
364 _mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A)
366 return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A,
367 (__v4si)
368 _mm_setzero_si128 (),
369 (__mmask8) __U);
372 extern __inline __m256i
373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
374 _mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
376 return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
377 (__v8si) __W,
378 (__mmask8)
379 __U);
382 extern __inline __m256i
383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
384 _mm256_maskz_load_epi32 (__mmask8 __U, void const *__P)
386 return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
387 (__v8si)
388 _mm256_setzero_si256 (),
389 (__mmask8)
390 __U);
393 extern __inline __m128i
394 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
395 _mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
397 return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
398 (__v4si) __W,
399 (__mmask8)
400 __U);
403 extern __inline __m128i
404 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
405 _mm_maskz_load_epi32 (__mmask8 __U, void const *__P)
407 return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
408 (__v4si)
409 _mm_setzero_si128 (),
410 (__mmask8)
411 __U);
414 extern __inline void
415 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
416 _mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A)
418 __builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
419 (__v8si) __A,
420 (__mmask8) __U);
423 extern __inline void
424 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
425 _mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
427 __builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
428 (__v4si) __A,
429 (__mmask8) __U);
432 extern __inline __m128i
433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
434 _mm_setzero_hi (void)
436 return __extension__ (__m128i) (__v8hi)
438 0, 0, 0, 0, 0, 0, 0, 0};
441 extern __inline __m128d
442 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
443 _mm_mask_add_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
445 return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
446 (__v2df) __B,
447 (__v2df) __W,
448 (__mmask8) __U);
451 extern __inline __m128d
452 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
453 _mm_maskz_add_pd (__mmask8 __U, __m128d __A, __m128d __B)
455 return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
456 (__v2df) __B,
457 (__v2df)
458 _mm_setzero_pd (),
459 (__mmask8) __U);
462 extern __inline __m256d
463 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
464 _mm256_mask_add_pd (__m256d __W, __mmask8 __U, __m256d __A,
465 __m256d __B)
467 return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
468 (__v4df) __B,
469 (__v4df) __W,
470 (__mmask8) __U);
473 extern __inline __m256d
474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
475 _mm256_maskz_add_pd (__mmask8 __U, __m256d __A, __m256d __B)
477 return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
478 (__v4df) __B,
479 (__v4df)
480 _mm256_setzero_pd (),
481 (__mmask8) __U);
484 extern __inline __m128
485 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
486 _mm_mask_add_ps (__m128 __W, __mmask16 __U, __m128 __A, __m128 __B)
488 return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
489 (__v4sf) __B,
490 (__v4sf) __W,
491 (__mmask8) __U);
494 extern __inline __m128
495 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
496 _mm_maskz_add_ps (__mmask16 __U, __m128 __A, __m128 __B)
498 return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
499 (__v4sf) __B,
500 (__v4sf)
501 _mm_setzero_ps (),
502 (__mmask8) __U);
505 extern __inline __m256
506 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
507 _mm256_mask_add_ps (__m256 __W, __mmask16 __U, __m256 __A, __m256 __B)
509 return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
510 (__v8sf) __B,
511 (__v8sf) __W,
512 (__mmask8) __U);
515 extern __inline __m256
516 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
517 _mm256_maskz_add_ps (__mmask16 __U, __m256 __A, __m256 __B)
519 return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
520 (__v8sf) __B,
521 (__v8sf)
522 _mm256_setzero_ps (),
523 (__mmask8) __U);
526 extern __inline __m128d
527 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
528 _mm_mask_sub_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
530 return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
531 (__v2df) __B,
532 (__v2df) __W,
533 (__mmask8) __U);
536 extern __inline __m128d
537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
538 _mm_maskz_sub_pd (__mmask8 __U, __m128d __A, __m128d __B)
540 return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
541 (__v2df) __B,
542 (__v2df)
543 _mm_setzero_pd (),
544 (__mmask8) __U);
547 extern __inline __m256d
548 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
549 _mm256_mask_sub_pd (__m256d __W, __mmask8 __U, __m256d __A,
550 __m256d __B)
552 return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
553 (__v4df) __B,
554 (__v4df) __W,
555 (__mmask8) __U);
558 extern __inline __m256d
559 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
560 _mm256_maskz_sub_pd (__mmask8 __U, __m256d __A, __m256d __B)
562 return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
563 (__v4df) __B,
564 (__v4df)
565 _mm256_setzero_pd (),
566 (__mmask8) __U);
569 extern __inline __m128
570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
571 _mm_mask_sub_ps (__m128 __W, __mmask16 __U, __m128 __A, __m128 __B)
573 return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
574 (__v4sf) __B,
575 (__v4sf) __W,
576 (__mmask8) __U);
579 extern __inline __m128
580 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
581 _mm_maskz_sub_ps (__mmask16 __U, __m128 __A, __m128 __B)
583 return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
584 (__v4sf) __B,
585 (__v4sf)
586 _mm_setzero_ps (),
587 (__mmask8) __U);
590 extern __inline __m256
591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
592 _mm256_mask_sub_ps (__m256 __W, __mmask16 __U, __m256 __A, __m256 __B)
594 return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
595 (__v8sf) __B,
596 (__v8sf) __W,
597 (__mmask8) __U);
600 extern __inline __m256
601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
602 _mm256_maskz_sub_ps (__mmask16 __U, __m256 __A, __m256 __B)
604 return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
605 (__v8sf) __B,
606 (__v8sf)
607 _mm256_setzero_ps (),
608 (__mmask8) __U);
611 extern __inline void
612 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
613 _mm256_store_epi64 (void *__P, __m256i __A)
615 *(__m256i *) __P = __A;
618 extern __inline void
619 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
620 _mm_store_epi64 (void *__P, __m128i __A)
622 *(__m128i *) __P = __A;
625 extern __inline __m256d
626 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
627 _mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P)
629 return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
630 (__v4df) __W,
631 (__mmask8) __U);
634 extern __inline __m256d
635 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
636 _mm256_maskz_loadu_pd (__mmask8 __U, void const *__P)
638 return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
639 (__v4df)
640 _mm256_setzero_pd (),
641 (__mmask8) __U);
644 extern __inline __m128d
645 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
646 _mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P)
648 return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
649 (__v2df) __W,
650 (__mmask8) __U);
653 extern __inline __m128d
654 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
655 _mm_maskz_loadu_pd (__mmask8 __U, void const *__P)
657 return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
658 (__v2df)
659 _mm_setzero_pd (),
660 (__mmask8) __U);
663 extern __inline void
664 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
665 _mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A)
667 __builtin_ia32_storeupd256_mask ((__v4df *) __P,
668 (__v4df) __A,
669 (__mmask8) __U);
672 extern __inline void
673 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
674 _mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A)
676 __builtin_ia32_storeupd128_mask ((__v2df *) __P,
677 (__v2df) __A,
678 (__mmask8) __U);
681 extern __inline __m256
682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
683 _mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P)
685 return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
686 (__v8sf) __W,
687 (__mmask8) __U);
690 extern __inline __m256
691 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
692 _mm256_maskz_loadu_ps (__mmask8 __U, void const *__P)
694 return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
695 (__v8sf)
696 _mm256_setzero_ps (),
697 (__mmask8) __U);
700 extern __inline __m128
701 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
702 _mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P)
704 return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
705 (__v4sf) __W,
706 (__mmask8) __U);
709 extern __inline __m128
710 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
711 _mm_maskz_loadu_ps (__mmask8 __U, void const *__P)
713 return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
714 (__v4sf)
715 _mm_setzero_ps (),
716 (__mmask8) __U);
719 extern __inline void
720 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
721 _mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A)
723 __builtin_ia32_storeups256_mask ((__v8sf *) __P,
724 (__v8sf) __A,
725 (__mmask8) __U);
728 extern __inline void
729 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
730 _mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A)
732 __builtin_ia32_storeups128_mask ((__v4sf *) __P,
733 (__v4sf) __A,
734 (__mmask8) __U);
737 extern __inline __m256i
738 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
739 _mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
741 return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
742 (__v4di) __W,
743 (__mmask8) __U);
746 extern __inline __m256i
747 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
748 _mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
750 return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
751 (__v4di)
752 _mm256_setzero_si256 (),
753 (__mmask8) __U);
756 extern __inline __m128i
757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
758 _mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
760 return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
761 (__v2di) __W,
762 (__mmask8) __U);
765 extern __inline __m128i
766 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
767 _mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
769 return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
770 (__v2di)
771 _mm_setzero_di (),
772 (__mmask8) __U);
775 extern __inline void
776 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
777 _mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
779 __builtin_ia32_storedqudi256_mask ((__v4di *) __P,
780 (__v4di) __A,
781 (__mmask8) __U);
784 extern __inline void
785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
786 _mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
788 __builtin_ia32_storedqudi128_mask ((__v2di *) __P,
789 (__v2di) __A,
790 (__mmask8) __U);
793 extern __inline __m256i
794 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
795 _mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
797 return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
798 (__v8si) __W,
799 (__mmask8) __U);
802 extern __inline __m256i
803 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
804 _mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
806 return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
807 (__v8si)
808 _mm256_setzero_si256 (),
809 (__mmask8) __U);
812 extern __inline __m128i
813 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
814 _mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
816 return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
817 (__v4si) __W,
818 (__mmask8) __U);
821 extern __inline __m128i
822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
823 _mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
825 return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
826 (__v4si)
827 _mm_setzero_si128 (),
828 (__mmask8) __U);
831 extern __inline void
832 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
833 _mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A)
835 __builtin_ia32_storedqusi256_mask ((__v8si *) __P,
836 (__v8si) __A,
837 (__mmask8) __U);
840 extern __inline void
841 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
842 _mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
844 __builtin_ia32_storedqusi128_mask ((__v4si *) __P,
845 (__v4si) __A,
846 (__mmask8) __U);
849 extern __inline __m256i
850 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
851 _mm256_mask_abs_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
853 return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
854 (__v8si) __W,
855 (__mmask8) __U);
858 extern __inline __m256i
859 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
860 _mm256_maskz_abs_epi32 (__mmask8 __U, __m256i __A)
862 return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
863 (__v8si)
864 _mm256_setzero_si256 (),
865 (__mmask8) __U);
868 extern __inline __m128i
869 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
870 _mm_mask_abs_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
872 return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
873 (__v4si) __W,
874 (__mmask8) __U);
877 extern __inline __m128i
878 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
879 _mm_maskz_abs_epi32 (__mmask8 __U, __m128i __A)
881 return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
882 (__v4si)
883 _mm_setzero_si128 (),
884 (__mmask8) __U);
887 extern __inline __m256i
888 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
889 _mm256_abs_epi64 (__m256i __A)
891 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
892 (__v4di)
893 _mm256_setzero_si256 (),
894 (__mmask8) -1);
897 extern __inline __m256i
898 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
899 _mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
901 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
902 (__v4di) __W,
903 (__mmask8) __U);
906 extern __inline __m256i
907 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
908 _mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A)
910 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
911 (__v4di)
912 _mm256_setzero_si256 (),
913 (__mmask8) __U);
916 extern __inline __m128i
917 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
918 _mm_abs_epi64 (__m128i __A)
920 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
921 (__v2di)
922 _mm_setzero_di (),
923 (__mmask8) -1);
926 extern __inline __m128i
927 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
928 _mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
930 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
931 (__v2di) __W,
932 (__mmask8) __U);
935 extern __inline __m128i
936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
937 _mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A)
939 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
940 (__v2di)
941 _mm_setzero_di (),
942 (__mmask8) __U);
945 extern __inline __m128i
946 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
947 _mm256_cvtpd_epu32 (__m256d __A)
949 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
950 (__v4si)
951 _mm_setzero_si128 (),
952 (__mmask8) -1);
955 extern __inline __m128i
956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
957 _mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A)
959 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
960 (__v4si) __W,
961 (__mmask8) __U);
964 extern __inline __m128i
965 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
966 _mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A)
968 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
969 (__v4si)
970 _mm_setzero_si128 (),
971 (__mmask8) __U);
974 extern __inline __m128i
975 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
976 _mm_cvtpd_epu32 (__m128d __A)
978 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
979 (__v4si)
980 _mm_setzero_si128 (),
981 (__mmask8) -1);
984 extern __inline __m128i
985 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
986 _mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A)
988 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
989 (__v4si) __W,
990 (__mmask8) __U);
993 extern __inline __m128i
994 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
995 _mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A)
997 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
998 (__v4si)
999 _mm_setzero_si128 (),
1000 (__mmask8) __U);
1003 extern __inline __m256i
1004 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1005 _mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A)
1007 return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
1008 (__v8si) __W,
1009 (__mmask8) __U);
1012 extern __inline __m256i
1013 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1014 _mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A)
1016 return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
1017 (__v8si)
1018 _mm256_setzero_si256 (),
1019 (__mmask8) __U);
1022 extern __inline __m128i
1023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1024 _mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A)
1026 return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
1027 (__v4si) __W,
1028 (__mmask8) __U);
1031 extern __inline __m128i
1032 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1033 _mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A)
1035 return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
1036 (__v4si)
1037 _mm_setzero_si128 (),
1038 (__mmask8) __U);
1041 extern __inline __m256i
1042 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1043 _mm256_cvttps_epu32 (__m256 __A)
1045 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
1046 (__v8si)
1047 _mm256_setzero_si256 (),
1048 (__mmask8) -1);
1051 extern __inline __m256i
1052 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1053 _mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A)
1055 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
1056 (__v8si) __W,
1057 (__mmask8) __U);
1060 extern __inline __m256i
1061 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1062 _mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A)
1064 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
1065 (__v8si)
1066 _mm256_setzero_si256 (),
1067 (__mmask8) __U);
1070 extern __inline __m128i
1071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1072 _mm_cvttps_epu32 (__m128 __A)
1074 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
1075 (__v4si)
1076 _mm_setzero_si128 (),
1077 (__mmask8) -1);
1080 extern __inline __m128i
1081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1082 _mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A)
1084 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
1085 (__v4si) __W,
1086 (__mmask8) __U);
1089 extern __inline __m128i
1090 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1091 _mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A)
1093 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
1094 (__v4si)
1095 _mm_setzero_si128 (),
1096 (__mmask8) __U);
1099 extern __inline __m128i
1100 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1101 _mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A)
1103 return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
1104 (__v4si) __W,
1105 (__mmask8) __U);
1108 extern __inline __m128i
1109 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1110 _mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A)
1112 return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
1113 (__v4si)
1114 _mm_setzero_si128 (),
1115 (__mmask8) __U);
1118 extern __inline __m128i
1119 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1120 _mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A)
1122 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
1123 (__v4si) __W,
1124 (__mmask8) __U);
1127 extern __inline __m128i
1128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1129 _mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A)
1131 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
1132 (__v4si)
1133 _mm_setzero_si128 (),
1134 (__mmask8) __U);
1137 extern __inline __m128i
1138 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1139 _mm256_cvttpd_epu32 (__m256d __A)
1141 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
1142 (__v4si)
1143 _mm_setzero_si128 (),
1144 (__mmask8) -1);
1147 extern __inline __m128i
1148 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1149 _mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A)
1151 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
1152 (__v4si) __W,
1153 (__mmask8) __U);
1156 extern __inline __m128i
1157 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1158 _mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A)
1160 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
1161 (__v4si)
1162 _mm_setzero_si128 (),
1163 (__mmask8) __U);
1166 extern __inline __m128i
1167 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1168 _mm_cvttpd_epu32 (__m128d __A)
1170 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
1171 (__v4si)
1172 _mm_setzero_si128 (),
1173 (__mmask8) -1);
1176 extern __inline __m128i
1177 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1178 _mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A)
1180 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
1181 (__v4si) __W,
1182 (__mmask8) __U);
1185 extern __inline __m128i
1186 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1187 _mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A)
1189 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
1190 (__v4si)
1191 _mm_setzero_si128 (),
1192 (__mmask8) __U);
1195 extern __inline __m128i
1196 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1197 _mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A)
1199 return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
1200 (__v4si) __W,
1201 (__mmask8) __U);
1204 extern __inline __m128i
1205 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1206 _mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A)
1208 return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
1209 (__v4si)
1210 _mm_setzero_si128 (),
1211 (__mmask8) __U);
1214 extern __inline __m128i
1215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1216 _mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A)
1218 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1219 (__v4si) __W,
1220 (__mmask8) __U);
1223 extern __inline __m128i
1224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1225 _mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A)
1227 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1228 (__v4si)
1229 _mm_setzero_si128 (),
1230 (__mmask8) __U);
1233 extern __inline __m256d
1234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1235 _mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A)
1237 return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
1238 (__v4df) __W,
1239 (__mmask8) __U);
1242 extern __inline __m256d
1243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1244 _mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A)
1246 return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
1247 (__v4df)
1248 _mm256_setzero_pd (),
1249 (__mmask8) __U);
1252 extern __inline __m128d
1253 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1254 _mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A)
1256 return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
1257 (__v2df) __W,
1258 (__mmask8) __U);
1261 extern __inline __m128d
1262 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1263 _mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A)
1265 return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
1266 (__v2df)
1267 _mm_setzero_pd (),
1268 (__mmask8) __U);
1271 extern __inline __m256d
1272 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1273 _mm256_cvtepu32_pd (__m128i __A)
1275 return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
1276 (__v4df)
1277 _mm256_setzero_pd (),
1278 (__mmask8) -1);
1281 extern __inline __m256d
1282 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1283 _mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A)
1285 return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
1286 (__v4df) __W,
1287 (__mmask8) __U);
1290 extern __inline __m256d
1291 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1292 _mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A)
1294 return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
1295 (__v4df)
1296 _mm256_setzero_pd (),
1297 (__mmask8) __U);
1300 extern __inline __m128d
1301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1302 _mm_cvtepu32_pd (__m128i __A)
1304 return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
1305 (__v2df)
1306 _mm_setzero_pd (),
1307 (__mmask8) -1);
1310 extern __inline __m128d
1311 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1312 _mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A)
1314 return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
1315 (__v2df) __W,
1316 (__mmask8) __U);
1319 extern __inline __m128d
1320 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1321 _mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A)
1323 return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
1324 (__v2df)
1325 _mm_setzero_pd (),
1326 (__mmask8) __U);
1329 extern __inline __m256
1330 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1331 _mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A)
1333 return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
1334 (__v8sf) __W,
1335 (__mmask8) __U);
1338 extern __inline __m256
1339 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1340 _mm256_maskz_cvtepi32_ps (__mmask16 __U, __m256i __A)
1342 return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
1343 (__v8sf)
1344 _mm256_setzero_ps (),
1345 (__mmask8) __U);
1348 extern __inline __m128
1349 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1350 _mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A)
1352 return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
1353 (__v4sf) __W,
1354 (__mmask8) __U);
1357 extern __inline __m128
1358 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1359 _mm_maskz_cvtepi32_ps (__mmask16 __U, __m128i __A)
1361 return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
1362 (__v4sf)
1363 _mm_setzero_ps (),
1364 (__mmask8) __U);
1367 extern __inline __m256
1368 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1369 _mm256_cvtepu32_ps (__m256i __A)
1371 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
1372 (__v8sf)
1373 _mm256_setzero_ps (),
1374 (__mmask8) -1);
1377 extern __inline __m256
1378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1379 _mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A)
1381 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
1382 (__v8sf) __W,
1383 (__mmask8) __U);
1386 extern __inline __m256
1387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1388 _mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A)
1390 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
1391 (__v8sf)
1392 _mm256_setzero_ps (),
1393 (__mmask8) __U);
1396 extern __inline __m128
1397 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1398 _mm_cvtepu32_ps (__m128i __A)
1400 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
1401 (__v4sf)
1402 _mm_setzero_ps (),
1403 (__mmask8) -1);
1406 extern __inline __m128
1407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1408 _mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A)
1410 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
1411 (__v4sf) __W,
1412 (__mmask8) __U);
1415 extern __inline __m128
1416 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1417 _mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A)
1419 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
1420 (__v4sf)
1421 _mm_setzero_ps (),
1422 (__mmask8) __U);
1425 extern __inline __m256d
1426 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1427 _mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A)
1429 return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
1430 (__v4df) __W,
1431 (__mmask8) __U);
1434 extern __inline __m256d
1435 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1436 _mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A)
1438 return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
1439 (__v4df)
1440 _mm256_setzero_pd (),
1441 (__mmask8) __U);
1444 extern __inline __m128d
1445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1446 _mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A)
1448 return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
1449 (__v2df) __W,
1450 (__mmask8) __U);
1453 extern __inline __m128d
1454 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1455 _mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A)
1457 return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
1458 (__v2df)
1459 _mm_setzero_pd (),
1460 (__mmask8) __U);
1463 extern __inline __m128i
1464 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1465 _mm_cvtepi32_epi8 (__m128i __A)
1467 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
1468 (__v16qi)_mm_undefined_si128(),
1469 (__mmask8) -1);
1472 extern __inline void
1473 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1474 _mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1476 __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
1479 extern __inline __m128i
1480 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1481 _mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1483 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
1484 (__v16qi) __O, __M);
1487 extern __inline __m128i
1488 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1489 _mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A)
1491 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
1492 (__v16qi)
1493 _mm_setzero_si128 (),
1494 __M);
1497 extern __inline __m128i
1498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1499 _mm256_cvtepi32_epi8 (__m256i __A)
1501 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
1502 (__v16qi)_mm_undefined_si128(),
1503 (__mmask8) -1);
1506 extern __inline __m128i
1507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1508 _mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1510 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
1511 (__v16qi) __O, __M);
1514 extern __inline void
1515 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1516 _mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1518 __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
1521 extern __inline __m128i
1522 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1523 _mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A)
1525 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
1526 (__v16qi)
1527 _mm_setzero_si128 (),
1528 __M);
1531 extern __inline __m128i
1532 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1533 _mm_cvtsepi32_epi8 (__m128i __A)
1535 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
1536 (__v16qi)_mm_undefined_si128(),
1537 (__mmask8) -1);
1540 extern __inline void
1541 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1542 _mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1544 __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
1547 extern __inline __m128i
1548 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1549 _mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1551 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
1552 (__v16qi) __O, __M);
1555 extern __inline __m128i
1556 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1557 _mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A)
1559 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
1560 (__v16qi)
1561 _mm_setzero_si128 (),
1562 __M);
1565 extern __inline __m128i
1566 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1567 _mm256_cvtsepi32_epi8 (__m256i __A)
1569 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
1570 (__v16qi)_mm_undefined_si128(),
1571 (__mmask8) -1);
1574 extern __inline void
1575 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1576 _mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1578 __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
1581 extern __inline __m128i
1582 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1583 _mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1585 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
1586 (__v16qi) __O, __M);
1589 extern __inline __m128i
1590 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1591 _mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A)
1593 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
1594 (__v16qi)
1595 _mm_setzero_si128 (),
1596 __M);
1599 extern __inline __m128i
1600 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1601 _mm_cvtusepi32_epi8 (__m128i __A)
1603 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
1604 (__v16qi)_mm_undefined_si128(),
1605 (__mmask8) -1);
1608 extern __inline void
1609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1610 _mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1612 __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
1615 extern __inline __m128i
1616 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1617 _mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1619 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
1620 (__v16qi) __O,
1621 __M);
1624 extern __inline __m128i
1625 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1626 _mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A)
1628 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
1629 (__v16qi)
1630 _mm_setzero_si128 (),
1631 __M);
1634 extern __inline __m128i
1635 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1636 _mm256_cvtusepi32_epi8 (__m256i __A)
1638 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
1639 (__v16qi)_mm_undefined_si128(),
1640 (__mmask8) -1);
1643 extern __inline void
1644 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1645 _mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1647 __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M);
1650 extern __inline __m128i
1651 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1652 _mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1654 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
1655 (__v16qi) __O,
1656 __M);
1659 extern __inline __m128i
1660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1661 _mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A)
1663 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
1664 (__v16qi)
1665 _mm_setzero_si128 (),
1666 __M);
1669 extern __inline __m128i
1670 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1671 _mm_cvtepi32_epi16 (__m128i __A)
1673 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
1674 (__v8hi) _mm_setzero_si128 (),
1675 (__mmask8) -1);
1678 extern __inline void
1679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1680 _mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
1682 __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
1685 extern __inline __m128i
1686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1687 _mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
1689 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
1690 (__v8hi) __O, __M);
1693 extern __inline __m128i
1694 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1695 _mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A)
1697 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
1698 (__v8hi)
1699 _mm_setzero_si128 (),
1700 __M);
1703 extern __inline __m128i
1704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1705 _mm256_cvtepi32_epi16 (__m256i __A)
1707 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
1708 (__v8hi)_mm_setzero_si128 (),
1709 (__mmask8) -1);
1712 extern __inline void
1713 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1714 _mm256_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
1716 __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
1719 extern __inline __m128i
1720 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1721 _mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
1723 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
1724 (__v8hi) __O, __M);
1727 extern __inline __m128i
1728 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1729 _mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A)
1731 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
1732 (__v8hi)
1733 _mm_setzero_si128 (),
1734 __M);
1737 extern __inline __m128i
1738 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1739 _mm_cvtsepi32_epi16 (__m128i __A)
1741 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
1742 (__v8hi)_mm_setzero_si128 (),
1743 (__mmask8) -1);
1746 extern __inline void
1747 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1748 _mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
1750 __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
1753 extern __inline __m128i
1754 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1755 _mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
1757 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
1758 (__v8hi)__O,
1759 __M);
1762 extern __inline __m128i
1763 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1764 _mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A)
1766 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
1767 (__v8hi)
1768 _mm_setzero_si128 (),
1769 __M);
1772 extern __inline __m128i
1773 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1774 _mm256_cvtsepi32_epi16 (__m256i __A)
1776 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
1777 (__v8hi)_mm_undefined_si128(),
1778 (__mmask8) -1);
1781 extern __inline void
1782 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1783 _mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
1785 __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
1788 extern __inline __m128i
1789 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1790 _mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
1792 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
1793 (__v8hi) __O, __M);
1796 extern __inline __m128i
1797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1798 _mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A)
1800 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
1801 (__v8hi)
1802 _mm_setzero_si128 (),
1803 __M);
1806 extern __inline __m128i
1807 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1808 _mm_cvtusepi32_epi16 (__m128i __A)
1810 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
1811 (__v8hi)_mm_undefined_si128(),
1812 (__mmask8) -1);
1815 extern __inline void
1816 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1817 _mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
1819 __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
1822 extern __inline __m128i
1823 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1824 _mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
1826 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
1827 (__v8hi) __O, __M);
1830 extern __inline __m128i
1831 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1832 _mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A)
1834 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
1835 (__v8hi)
1836 _mm_setzero_si128 (),
1837 __M);
1840 extern __inline __m128i
1841 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1842 _mm256_cvtusepi32_epi16 (__m256i __A)
1844 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
1845 (__v8hi)_mm_undefined_si128(),
1846 (__mmask8) -1);
1849 extern __inline void
1850 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1851 _mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
1853 __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
1856 extern __inline __m128i
1857 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1858 _mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
1860 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
1861 (__v8hi) __O, __M);
1864 extern __inline __m128i
1865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1866 _mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A)
1868 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
1869 (__v8hi)
1870 _mm_setzero_si128 (),
1871 __M);
1874 extern __inline __m128i
1875 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1876 _mm_cvtepi64_epi8 (__m128i __A)
1878 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
1879 (__v16qi)_mm_undefined_si128(),
1880 (__mmask8) -1);
1883 extern __inline void
1884 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1885 _mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1887 __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
1890 extern __inline __m128i
1891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1892 _mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1894 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
1895 (__v16qi) __O, __M);
1898 extern __inline __m128i
1899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1900 _mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A)
1902 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
1903 (__v16qi)
1904 _mm_setzero_si128 (),
1905 __M);
1908 extern __inline __m128i
1909 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1910 _mm256_cvtepi64_epi8 (__m256i __A)
1912 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
1913 (__v16qi)_mm_undefined_si128(),
1914 (__mmask8) -1);
1917 extern __inline void
1918 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1919 _mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1921 __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
1924 extern __inline __m128i
1925 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1926 _mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1928 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
1929 (__v16qi) __O, __M);
1932 extern __inline __m128i
1933 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1934 _mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A)
1936 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
1937 (__v16qi)
1938 _mm_setzero_si128 (),
1939 __M);
1942 extern __inline __m128i
1943 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1944 _mm_cvtsepi64_epi8 (__m128i __A)
1946 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
1947 (__v16qi)_mm_undefined_si128(),
1948 (__mmask8) -1);
1951 extern __inline void
1952 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1953 _mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1955 __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
1958 extern __inline __m128i
1959 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1960 _mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1962 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
1963 (__v16qi) __O, __M);
1966 extern __inline __m128i
1967 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1968 _mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A)
1970 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
1971 (__v16qi)
1972 _mm_setzero_si128 (),
1973 __M);
1976 extern __inline __m128i
1977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1978 _mm256_cvtsepi64_epi8 (__m256i __A)
1980 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
1981 (__v16qi)_mm_undefined_si128(),
1982 (__mmask8) -1);
1985 extern __inline void
1986 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1987 _mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1989 __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
1992 extern __inline __m128i
1993 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1994 _mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1996 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
1997 (__v16qi) __O, __M);
2000 extern __inline __m128i
2001 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2002 _mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A)
2004 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
2005 (__v16qi)
2006 _mm_setzero_si128 (),
2007 __M);
2010 extern __inline __m128i
2011 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2012 _mm_cvtusepi64_epi8 (__m128i __A)
2014 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
2015 (__v16qi)_mm_undefined_si128(),
2016 (__mmask8) -1);
2019 extern __inline void
2020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2021 _mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
2023 __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
2026 extern __inline __m128i
2027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2028 _mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
2030 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
2031 (__v16qi) __O,
2032 __M);
2035 extern __inline __m128i
2036 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2037 _mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A)
2039 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
2040 (__v16qi)
2041 _mm_setzero_si128 (),
2042 __M);
2045 extern __inline __m128i
2046 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2047 _mm256_cvtusepi64_epi8 (__m256i __A)
2049 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
2050 (__v16qi)_mm_undefined_si128(),
2051 (__mmask8) -1);
2054 extern __inline void
2055 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2056 _mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
2058 __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
2061 extern __inline __m128i
2062 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2063 _mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
2065 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
2066 (__v16qi) __O,
2067 __M);
2070 extern __inline __m128i
2071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2072 _mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A)
2074 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
2075 (__v16qi)
2076 _mm_setzero_si128 (),
2077 __M);
2080 extern __inline __m128i
2081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2082 _mm_cvtepi64_epi16 (__m128i __A)
2084 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
2085 (__v8hi)_mm_undefined_si128(),
2086 (__mmask8) -1);
2089 extern __inline void
2090 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2091 _mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
2093 __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
2096 extern __inline __m128i
2097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2098 _mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
2100 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
2101 (__v8hi)__O,
2102 __M);
2105 extern __inline __m128i
2106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2107 _mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A)
2109 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
2110 (__v8hi)
2111 _mm_setzero_si128 (),
2112 __M);
2115 extern __inline __m128i
2116 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2117 _mm256_cvtepi64_epi16 (__m256i __A)
2119 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
2120 (__v8hi)_mm_undefined_si128(),
2121 (__mmask8) -1);
2124 extern __inline void
2125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2126 _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
2128 __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
2131 extern __inline __m128i
2132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2133 _mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
2135 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
2136 (__v8hi) __O, __M);
2139 extern __inline __m128i
2140 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2141 _mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A)
2143 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
2144 (__v8hi)
2145 _mm_setzero_si128 (),
2146 __M);
2149 extern __inline __m128i
2150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2151 _mm_cvtsepi64_epi16 (__m128i __A)
2153 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
2154 (__v8hi)_mm_undefined_si128(),
2155 (__mmask8) -1);
2158 extern __inline void
2159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2160 _mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
2162 __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
2165 extern __inline __m128i
2166 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2167 _mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
2169 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
2170 (__v8hi) __O, __M);
2173 extern __inline __m128i
2174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2175 _mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A)
2177 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
2178 (__v8hi)
2179 _mm_setzero_si128 (),
2180 __M);
2183 extern __inline __m128i
2184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2185 _mm256_cvtsepi64_epi16 (__m256i __A)
2187 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
2188 (__v8hi)_mm_undefined_si128(),
2189 (__mmask8) -1);
2192 extern __inline void
2193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2194 _mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
2196 __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
2199 extern __inline __m128i
2200 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2201 _mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
2203 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
2204 (__v8hi) __O, __M);
2207 extern __inline __m128i
2208 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2209 _mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A)
2211 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
2212 (__v8hi)
2213 _mm_setzero_si128 (),
2214 __M);
2217 extern __inline __m128i
2218 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2219 _mm_cvtusepi64_epi16 (__m128i __A)
2221 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
2222 (__v8hi)_mm_undefined_si128(),
2223 (__mmask8) -1);
2226 extern __inline void
2227 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2228 _mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
2230 __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
2233 extern __inline __m128i
2234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2235 _mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
2237 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
2238 (__v8hi) __O, __M);
2241 extern __inline __m128i
2242 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2243 _mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A)
2245 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
2246 (__v8hi)
2247 _mm_setzero_si128 (),
2248 __M);
2251 extern __inline __m128i
2252 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2253 _mm256_cvtusepi64_epi16 (__m256i __A)
2255 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
2256 (__v8hi)_mm_undefined_si128(),
2257 (__mmask8) -1);
2260 extern __inline void
2261 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2262 _mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
2264 __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
2267 extern __inline __m128i
2268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2269 _mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
2271 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
2272 (__v8hi) __O, __M);
2275 extern __inline __m128i
2276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2277 _mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A)
2279 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
2280 (__v8hi)
2281 _mm_setzero_si128 (),
2282 __M);
2285 extern __inline __m128i
2286 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2287 _mm_cvtepi64_epi32 (__m128i __A)
2289 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
2290 (__v4si)_mm_undefined_si128(),
2291 (__mmask8) -1);
2294 extern __inline void
2295 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2296 _mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
2298 __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
2301 extern __inline __m128i
2302 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2303 _mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2305 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
2306 (__v4si) __O, __M);
2309 extern __inline __m128i
2310 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2311 _mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A)
2313 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
2314 (__v4si)
2315 _mm_setzero_si128 (),
2316 __M);
2319 extern __inline __m128i
2320 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2321 _mm256_cvtepi64_epi32 (__m256i __A)
2323 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
2324 (__v4si)_mm_undefined_si128(),
2325 (__mmask8) -1);
2328 extern __inline void
2329 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2330 _mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
2332 __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
2335 extern __inline __m128i
2336 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2337 _mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
2339 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
2340 (__v4si) __O, __M);
2343 extern __inline __m128i
2344 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2345 _mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A)
2347 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
2348 (__v4si)
2349 _mm_setzero_si128 (),
2350 __M);
2353 extern __inline __m128i
2354 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2355 _mm_cvtsepi64_epi32 (__m128i __A)
2357 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
2358 (__v4si)_mm_undefined_si128(),
2359 (__mmask8) -1);
2362 extern __inline void
2363 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2364 _mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
2366 __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
2369 extern __inline __m128i
2370 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2371 _mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2373 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
2374 (__v4si) __O, __M);
2377 extern __inline __m128i
2378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2379 _mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A)
2381 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
2382 (__v4si)
2383 _mm_setzero_si128 (),
2384 __M);
2387 extern __inline __m128i
2388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2389 _mm256_cvtsepi64_epi32 (__m256i __A)
2391 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
2392 (__v4si)_mm_undefined_si128(),
2393 (__mmask8) -1);
2396 extern __inline void
2397 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2398 _mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
2400 __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
2403 extern __inline __m128i
2404 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2405 _mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
2407 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
2408 (__v4si)__O,
2409 __M);
2412 extern __inline __m128i
2413 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2414 _mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A)
2416 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
2417 (__v4si)
2418 _mm_setzero_si128 (),
2419 __M);
2422 extern __inline __m128i
2423 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2424 _mm_cvtusepi64_epi32 (__m128i __A)
2426 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
2427 (__v4si)_mm_undefined_si128(),
2428 (__mmask8) -1);
2431 extern __inline void
2432 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2433 _mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
2435 __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
2438 extern __inline __m128i
2439 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2440 _mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2442 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
2443 (__v4si) __O, __M);
2446 extern __inline __m128i
2447 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2448 _mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A)
2450 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
2451 (__v4si)
2452 _mm_setzero_si128 (),
2453 __M);
2456 extern __inline __m128i
2457 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2458 _mm256_cvtusepi64_epi32 (__m256i __A)
2460 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
2461 (__v4si)_mm_undefined_si128(),
2462 (__mmask8) -1);
2465 extern __inline void
2466 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2467 _mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
2469 __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
2472 extern __inline __m128i
2473 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2474 _mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
2476 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
2477 (__v4si) __O, __M);
2480 extern __inline __m128i
2481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2482 _mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A)
2484 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
2485 (__v4si)
2486 _mm_setzero_si128 (),
2487 __M);
2490 extern __inline __m256
2491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2492 _mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A)
2494 return (__m256) __builtin_ia32_broadcastss256_mask ((__v4sf) __A,
2495 (__v8sf) __O,
2496 __M);
2499 extern __inline __m256
2500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2501 _mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
2503 return (__m256) __builtin_ia32_broadcastss256_mask ((__v4sf) __A,
2504 (__v8sf)
2505 _mm256_setzero_ps (),
2506 __M);
2509 extern __inline __m128
2510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2511 _mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A)
2513 return (__m128) __builtin_ia32_broadcastss128_mask ((__v4sf) __A,
2514 (__v4sf) __O,
2515 __M);
2518 extern __inline __m128
2519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2520 _mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
2522 return (__m128) __builtin_ia32_broadcastss128_mask ((__v4sf) __A,
2523 (__v4sf)
2524 _mm_setzero_ps (),
2525 __M);
2528 extern __inline __m256d
2529 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2530 _mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A)
2532 return (__m256d) __builtin_ia32_broadcastsd256_mask ((__v2df) __A,
2533 (__v4df) __O,
2534 __M);
2537 extern __inline __m256d
2538 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2539 _mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
2541 return (__m256d) __builtin_ia32_broadcastsd256_mask ((__v2df) __A,
2542 (__v4df)
2543 _mm256_setzero_pd (),
2544 __M);
2547 extern __inline __m256i
2548 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2549 _mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A)
2551 return (__m256i) __builtin_ia32_pbroadcastd256_mask ((__v4si) __A,
2552 (__v8si) __O,
2553 __M);
2556 extern __inline __m256i
2557 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2558 _mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
2560 return (__m256i) __builtin_ia32_pbroadcastd256_mask ((__v4si) __A,
2561 (__v8si)
2562 _mm256_setzero_si256 (),
2563 __M);
2566 extern __inline __m256i
2567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2568 _mm256_mask_set1_epi32 (__m256i __O, __mmask8 __M, int __A)
2570 return (__m256i) __builtin_ia32_pbroadcastd256_gpr_mask (__A, (__v8si) __O,
2571 __M);
2574 extern __inline __m256i
2575 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2576 _mm256_maskz_set1_epi32 (__mmask8 __M, int __A)
2578 return (__m256i) __builtin_ia32_pbroadcastd256_gpr_mask (__A,
2579 (__v8si)
2580 _mm256_setzero_si256 (),
2581 __M);
2584 extern __inline __m128i
2585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2586 _mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2588 return (__m128i) __builtin_ia32_pbroadcastd128_mask ((__v4si) __A,
2589 (__v4si) __O,
2590 __M);
2593 extern __inline __m128i
2594 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2595 _mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
2597 return (__m128i) __builtin_ia32_pbroadcastd128_mask ((__v4si) __A,
2598 (__v4si)
2599 _mm_setzero_si128 (),
2600 __M);
2603 extern __inline __m128i
2604 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2605 _mm_mask_set1_epi32 (__m128i __O, __mmask8 __M, int __A)
2607 return (__m128i) __builtin_ia32_pbroadcastd128_gpr_mask (__A, (__v4si) __O,
2608 __M);
2611 extern __inline __m128i
2612 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2613 _mm_maskz_set1_epi32 (__mmask8 __M, int __A)
2615 return (__m128i) __builtin_ia32_pbroadcastd128_gpr_mask (__A,
2616 (__v4si)
2617 _mm_setzero_si128 (),
2618 __M);
2621 extern __inline __m256i
2622 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2623 _mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A)
2625 return (__m256i) __builtin_ia32_pbroadcastq256_mask ((__v2di) __A,
2626 (__v4di) __O,
2627 __M);
2630 extern __inline __m256i
2631 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2632 _mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
2634 return (__m256i) __builtin_ia32_pbroadcastq256_mask ((__v2di) __A,
2635 (__v4di)
2636 _mm256_setzero_si256 (),
2637 __M);
2640 extern __inline __m256i
2641 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2642 _mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
2644 return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A, (__v4di) __O,
2645 __M);
2648 extern __inline __m256i
2649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2650 _mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
2652 return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A,
2653 (__v4di)
2654 _mm256_setzero_si256 (),
2655 __M);
2658 extern __inline __m128i
2659 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2660 _mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A)
2662 return (__m128i) __builtin_ia32_pbroadcastq128_mask ((__v2di) __A,
2663 (__v2di) __O,
2664 __M);
2667 extern __inline __m128i
2668 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2669 _mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
2671 return (__m128i) __builtin_ia32_pbroadcastq128_mask ((__v2di) __A,
2672 (__v2di)
2673 _mm_setzero_si128 (),
2674 __M);
2677 extern __inline __m128i
2678 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2679 _mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
2681 return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A, (__v2di) __O,
2682 __M);
2685 extern __inline __m128i
2686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2687 _mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
2689 return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A,
2690 (__v2di)
2691 _mm_setzero_si128 (),
2692 __M);
2695 extern __inline __m256
2696 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2697 _mm256_broadcast_f32x4 (__m128 __A)
2699 return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
2700 (__v8sf)_mm256_undefined_pd (),
2701 (__mmask8) -
2705 extern __inline __m256
2706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2707 _mm256_mask_broadcast_f32x4 (__m256 __O, __mmask8 __M, __m128 __A)
2709 return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
2710 (__v8sf) __O,
2711 __M);
2714 extern __inline __m256
2715 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2716 _mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A)
2718 return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
2719 (__v8sf)
2720 _mm256_setzero_ps (),
2721 __M);
2724 extern __inline __m256i
2725 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2726 _mm256_broadcast_i32x4 (__m128i __A)
2728 return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
2729 __A,
2730 (__v8si)_mm256_undefined_si256 (),
2731 (__mmask8) -
2735 extern __inline __m256i
2736 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2737 _mm256_mask_broadcast_i32x4 (__m256i __O, __mmask8 __M, __m128i __A)
2739 return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
2740 __A,
2741 (__v8si)
2742 __O, __M);
2745 extern __inline __m256i
2746 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2747 _mm256_maskz_broadcast_i32x4 (__mmask8 __M, __m128i __A)
2749 return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
2750 __A,
2751 (__v8si)
2752 _mm256_setzero_si256 (),
2753 __M);
2756 extern __inline __m256i
2757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2758 _mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
2760 return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A,
2761 (__v8si) __W,
2762 (__mmask8) __U);
2765 extern __inline __m256i
2766 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2767 _mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
2769 return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A,
2770 (__v8si)
2771 _mm256_setzero_si256 (),
2772 (__mmask8) __U);
2775 extern __inline __m128i
2776 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2777 _mm_mask_cvtepi8_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
2779 return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A,
2780 (__v4si) __W,
2781 (__mmask8) __U);
2784 extern __inline __m128i
2785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2786 _mm_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
2788 return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A,
2789 (__v4si)
2790 _mm_setzero_si128 (),
2791 (__mmask8) __U);
2794 extern __inline __m256i
2795 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2796 _mm256_mask_cvtepi8_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
2798 return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A,
2799 (__v4di) __W,
2800 (__mmask8) __U);
2803 extern __inline __m256i
2804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2805 _mm256_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
2807 return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A,
2808 (__v4di)
2809 _mm256_setzero_si256 (),
2810 (__mmask8) __U);
2813 extern __inline __m128i
2814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2815 _mm_mask_cvtepi8_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
2817 return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A,
2818 (__v2di) __W,
2819 (__mmask8) __U);
2822 extern __inline __m128i
2823 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2824 _mm_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
2826 return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A,
2827 (__v2di)
2828 _mm_setzero_si128 (),
2829 (__mmask8) __U);
2832 extern __inline __m256i
2833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2834 _mm256_mask_cvtepi16_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
2836 return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A,
2837 (__v8si) __W,
2838 (__mmask8) __U);
2841 extern __inline __m256i
2842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2843 _mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
2845 return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A,
2846 (__v8si)
2847 _mm256_setzero_si256 (),
2848 (__mmask8) __U);
2851 extern __inline __m128i
2852 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2853 _mm_mask_cvtepi16_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
2855 return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A,
2856 (__v4si) __W,
2857 (__mmask8) __U);
2860 extern __inline __m128i
2861 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2862 _mm_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
2864 return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A,
2865 (__v4si)
2866 _mm_setzero_si128 (),
2867 (__mmask8) __U);
2870 extern __inline __m256i
2871 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2872 _mm256_mask_cvtepi16_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
2874 return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A,
2875 (__v4di) __W,
2876 (__mmask8) __U);
2879 extern __inline __m256i
2880 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2881 _mm256_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
2883 return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A,
2884 (__v4di)
2885 _mm256_setzero_si256 (),
2886 (__mmask8) __U);
2889 extern __inline __m128i
2890 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2891 _mm_mask_cvtepi16_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
2893 return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A,
2894 (__v2di) __W,
2895 (__mmask8) __U);
2898 extern __inline __m128i
2899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2900 _mm_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
2902 return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A,
2903 (__v2di)
2904 _mm_setzero_si128 (),
2905 (__mmask8) __U);
2908 extern __inline __m256i
2909 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2910 _mm256_mask_cvtepi32_epi64 (__m256i __W, __mmask8 __U, __m128i __X)
2912 return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X,
2913 (__v4di) __W,
2914 (__mmask8) __U);
2917 extern __inline __m256i
2918 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2919 _mm256_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X)
2921 return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X,
2922 (__v4di)
2923 _mm256_setzero_si256 (),
2924 (__mmask8) __U);
2927 extern __inline __m128i
2928 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2929 _mm_mask_cvtepi32_epi64 (__m128i __W, __mmask8 __U, __m128i __X)
2931 return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X,
2932 (__v2di) __W,
2933 (__mmask8) __U);
2936 extern __inline __m128i
2937 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2938 _mm_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X)
2940 return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X,
2941 (__v2di)
2942 _mm_setzero_si128 (),
2943 (__mmask8) __U);
2946 extern __inline __m256i
2947 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2948 _mm256_mask_cvtepu8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
2950 return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A,
2951 (__v8si) __W,
2952 (__mmask8) __U);
2955 extern __inline __m256i
2956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2957 _mm256_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A)
2959 return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A,
2960 (__v8si)
2961 _mm256_setzero_si256 (),
2962 (__mmask8) __U);
2965 extern __inline __m128i
2966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2967 _mm_mask_cvtepu8_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
2969 return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A,
2970 (__v4si) __W,
2971 (__mmask8) __U);
2974 extern __inline __m128i
2975 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2976 _mm_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A)
2978 return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A,
2979 (__v4si)
2980 _mm_setzero_si128 (),
2981 (__mmask8) __U);
2984 extern __inline __m256i
2985 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2986 _mm256_mask_cvtepu8_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
2988 return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A,
2989 (__v4di) __W,
2990 (__mmask8) __U);
2993 extern __inline __m256i
2994 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2995 _mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
2997 return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A,
2998 (__v4di)
2999 _mm256_setzero_si256 (),
3000 (__mmask8) __U);
3003 extern __inline __m128i
3004 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3005 _mm_mask_cvtepu8_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
3007 return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A,
3008 (__v2di) __W,
3009 (__mmask8) __U);
3012 extern __inline __m128i
3013 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3014 _mm_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
3016 return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A,
3017 (__v2di)
3018 _mm_setzero_si128 (),
3019 (__mmask8) __U);
3022 extern __inline __m256i
3023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3024 _mm256_mask_cvtepu16_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
3026 return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A,
3027 (__v8si) __W,
3028 (__mmask8) __U);
3031 extern __inline __m256i
3032 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3033 _mm256_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A)
3035 return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A,
3036 (__v8si)
3037 _mm256_setzero_si256 (),
3038 (__mmask8) __U);
3041 extern __inline __m128i
3042 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3043 _mm_mask_cvtepu16_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
3045 return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A,
3046 (__v4si) __W,
3047 (__mmask8) __U);
3050 extern __inline __m128i
3051 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3052 _mm_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A)
3054 return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A,
3055 (__v4si)
3056 _mm_setzero_si128 (),
3057 (__mmask8) __U);
3060 extern __inline __m256i
3061 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3062 _mm256_mask_cvtepu16_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
3064 return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A,
3065 (__v4di) __W,
3066 (__mmask8) __U);
3069 extern __inline __m256i
3070 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3071 _mm256_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
3073 return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A,
3074 (__v4di)
3075 _mm256_setzero_si256 (),
3076 (__mmask8) __U);
3079 extern __inline __m128i
3080 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3081 _mm_mask_cvtepu16_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
3083 return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A,
3084 (__v2di) __W,
3085 (__mmask8) __U);
3088 extern __inline __m128i
3089 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3090 _mm_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
3092 return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A,
3093 (__v2di)
3094 _mm_setzero_si128 (),
3095 (__mmask8) __U);
3098 extern __inline __m256i
3099 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3100 _mm256_mask_cvtepu32_epi64 (__m256i __W, __mmask8 __U, __m128i __X)
3102 return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X,
3103 (__v4di) __W,
3104 (__mmask8) __U);
3107 extern __inline __m256i
3108 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3109 _mm256_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X)
3111 return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X,
3112 (__v4di)
3113 _mm256_setzero_si256 (),
3114 (__mmask8) __U);
3117 extern __inline __m128i
3118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3119 _mm_mask_cvtepu32_epi64 (__m128i __W, __mmask8 __U, __m128i __X)
3121 return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X,
3122 (__v2di) __W,
3123 (__mmask8) __U);
3126 extern __inline __m128i
3127 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3128 _mm_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X)
3130 return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X,
3131 (__v2di)
3132 _mm_setzero_si128 (),
3133 (__mmask8) __U);
3136 extern __inline __m256d
3137 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3138 _mm256_rcp14_pd (__m256d __A)
3140 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
3141 (__v4df)
3142 _mm256_setzero_pd (),
3143 (__mmask8) -1);
3146 extern __inline __m256d
3147 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3148 _mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A)
3150 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
3151 (__v4df) __W,
3152 (__mmask8) __U);
3155 extern __inline __m256d
3156 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3157 _mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A)
3159 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
3160 (__v4df)
3161 _mm256_setzero_pd (),
3162 (__mmask8) __U);
3165 extern __inline __m128d
3166 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3167 _mm_rcp14_pd (__m128d __A)
3169 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
3170 (__v2df)
3171 _mm_setzero_pd (),
3172 (__mmask8) -1);
3175 extern __inline __m128d
3176 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3177 _mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A)
3179 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
3180 (__v2df) __W,
3181 (__mmask8) __U);
3184 extern __inline __m128d
3185 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3186 _mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A)
3188 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
3189 (__v2df)
3190 _mm_setzero_pd (),
3191 (__mmask8) __U);
3194 extern __inline __m256
3195 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3196 _mm256_rcp14_ps (__m256 __A)
3198 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
3199 (__v8sf)
3200 _mm256_setzero_ps (),
3201 (__mmask8) -1);
3204 extern __inline __m256
3205 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3206 _mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A)
3208 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
3209 (__v8sf) __W,
3210 (__mmask8) __U);
3213 extern __inline __m256
3214 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3215 _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A)
3217 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
3218 (__v8sf)
3219 _mm256_setzero_ps (),
3220 (__mmask8) __U);
3223 extern __inline __m128
3224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3225 _mm_rcp14_ps (__m128 __A)
3227 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
3228 (__v4sf)
3229 _mm_setzero_ps (),
3230 (__mmask8) -1);
3233 extern __inline __m128
3234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3235 _mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A)
3237 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
3238 (__v4sf) __W,
3239 (__mmask8) __U);
3242 extern __inline __m128
3243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3244 _mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A)
3246 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
3247 (__v4sf)
3248 _mm_setzero_ps (),
3249 (__mmask8) __U);
3252 extern __inline __m256d
3253 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3254 _mm256_rsqrt14_pd (__m256d __A)
3256 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
3257 (__v4df)
3258 _mm256_setzero_pd (),
3259 (__mmask8) -1);
3262 extern __inline __m256d
3263 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3264 _mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A)
3266 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
3267 (__v4df) __W,
3268 (__mmask8) __U);
3271 extern __inline __m256d
3272 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3273 _mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A)
3275 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
3276 (__v4df)
3277 _mm256_setzero_pd (),
3278 (__mmask8) __U);
3281 extern __inline __m128d
3282 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3283 _mm_rsqrt14_pd (__m128d __A)
3285 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
3286 (__v2df)
3287 _mm_setzero_pd (),
3288 (__mmask8) -1);
3291 extern __inline __m128d
3292 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3293 _mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A)
3295 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
3296 (__v2df) __W,
3297 (__mmask8) __U);
3300 extern __inline __m128d
3301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3302 _mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A)
3304 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
3305 (__v2df)
3306 _mm_setzero_pd (),
3307 (__mmask8) __U);
3310 extern __inline __m256
3311 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3312 _mm256_rsqrt14_ps (__m256 __A)
3314 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
3315 (__v8sf)
3316 _mm256_setzero_ps (),
3317 (__mmask8) -1);
3320 extern __inline __m256
3321 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3322 _mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A)
3324 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
3325 (__v8sf) __W,
3326 (__mmask8) __U);
3329 extern __inline __m256
3330 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3331 _mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A)
3333 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
3334 (__v8sf)
3335 _mm256_setzero_ps (),
3336 (__mmask8) __U);
3339 extern __inline __m128
3340 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3341 _mm_rsqrt14_ps (__m128 __A)
3343 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
3344 (__v4sf)
3345 _mm_setzero_ps (),
3346 (__mmask8) -1);
3349 extern __inline __m128
3350 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3351 _mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A)
3353 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
3354 (__v4sf) __W,
3355 (__mmask8) __U);
3358 extern __inline __m128
3359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3360 _mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A)
3362 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
3363 (__v4sf)
3364 _mm_setzero_ps (),
3365 (__mmask8) __U);
3368 extern __inline __m256d
3369 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3370 _mm256_mask_sqrt_pd (__m256d __W, __mmask8 __U, __m256d __A)
3372 return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
3373 (__v4df) __W,
3374 (__mmask8) __U);
3377 extern __inline __m256d
3378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3379 _mm256_maskz_sqrt_pd (__mmask8 __U, __m256d __A)
3381 return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
3382 (__v4df)
3383 _mm256_setzero_pd (),
3384 (__mmask8) __U);
3387 extern __inline __m128d
3388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3389 _mm_mask_sqrt_pd (__m128d __W, __mmask8 __U, __m128d __A)
3391 return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
3392 (__v2df) __W,
3393 (__mmask8) __U);
3396 extern __inline __m128d
3397 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3398 _mm_maskz_sqrt_pd (__mmask8 __U, __m128d __A)
3400 return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
3401 (__v2df)
3402 _mm_setzero_pd (),
3403 (__mmask8) __U);
3406 extern __inline __m256
3407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3408 _mm256_mask_sqrt_ps (__m256 __W, __mmask8 __U, __m256 __A)
3410 return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
3411 (__v8sf) __W,
3412 (__mmask8) __U);
3415 extern __inline __m256
3416 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3417 _mm256_maskz_sqrt_ps (__mmask8 __U, __m256 __A)
3419 return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
3420 (__v8sf)
3421 _mm256_setzero_ps (),
3422 (__mmask8) __U);
3425 extern __inline __m128
3426 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3427 _mm_mask_sqrt_ps (__m128 __W, __mmask8 __U, __m128 __A)
3429 return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
3430 (__v4sf) __W,
3431 (__mmask8) __U);
3434 extern __inline __m128
3435 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3436 _mm_maskz_sqrt_ps (__mmask8 __U, __m128 __A)
3438 return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
3439 (__v4sf)
3440 _mm_setzero_ps (),
3441 (__mmask8) __U);
3444 extern __inline __m256i
3445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3446 _mm256_mask_add_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3447 __m256i __B)
3449 return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
3450 (__v8si) __B,
3451 (__v8si) __W,
3452 (__mmask8) __U);
3455 extern __inline __m256i
3456 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3457 _mm256_maskz_add_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
3459 return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
3460 (__v8si) __B,
3461 (__v8si)
3462 _mm256_setzero_si256 (),
3463 (__mmask8) __U);
3466 extern __inline __m256i
3467 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3468 _mm256_mask_add_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
3469 __m256i __B)
3471 return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
3472 (__v4di) __B,
3473 (__v4di) __W,
3474 (__mmask8) __U);
3477 extern __inline __m256i
3478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3479 _mm256_maskz_add_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
3481 return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
3482 (__v4di) __B,
3483 (__v4di)
3484 _mm256_setzero_si256 (),
3485 (__mmask8) __U);
3488 extern __inline __m256i
3489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3490 _mm256_mask_sub_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3491 __m256i __B)
3493 return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
3494 (__v8si) __B,
3495 (__v8si) __W,
3496 (__mmask8) __U);
3499 extern __inline __m256i
3500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3501 _mm256_maskz_sub_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
3503 return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
3504 (__v8si) __B,
3505 (__v8si)
3506 _mm256_setzero_si256 (),
3507 (__mmask8) __U);
3510 extern __inline __m256i
3511 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3512 _mm256_mask_sub_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
3513 __m256i __B)
3515 return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
3516 (__v4di) __B,
3517 (__v4di) __W,
3518 (__mmask8) __U);
3521 extern __inline __m256i
3522 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3523 _mm256_maskz_sub_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
3525 return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
3526 (__v4di) __B,
3527 (__v4di)
3528 _mm256_setzero_si256 (),
3529 (__mmask8) __U);
3532 extern __inline __m128i
3533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3534 _mm_mask_add_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
3535 __m128i __B)
3537 return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
3538 (__v4si) __B,
3539 (__v4si) __W,
3540 (__mmask8) __U);
3543 extern __inline __m128i
3544 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3545 _mm_maskz_add_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
3547 return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
3548 (__v4si) __B,
3549 (__v4si)
3550 _mm_setzero_si128 (),
3551 (__mmask8) __U);
3554 extern __inline __m128i
3555 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3556 _mm_mask_add_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
3557 __m128i __B)
3559 return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
3560 (__v2di) __B,
3561 (__v2di) __W,
3562 (__mmask8) __U);
3565 extern __inline __m128i
3566 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3567 _mm_maskz_add_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
3569 return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
3570 (__v2di) __B,
3571 (__v2di)
3572 _mm_setzero_si128 (),
3573 (__mmask8) __U);
3576 extern __inline __m128i
3577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3578 _mm_mask_sub_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
3579 __m128i __B)
3581 return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
3582 (__v4si) __B,
3583 (__v4si) __W,
3584 (__mmask8) __U);
3587 extern __inline __m128i
3588 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3589 _mm_maskz_sub_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
3591 return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
3592 (__v4si) __B,
3593 (__v4si)
3594 _mm_setzero_si128 (),
3595 (__mmask8) __U);
3598 extern __inline __m128i
3599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3600 _mm_mask_sub_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
3601 __m128i __B)
3603 return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
3604 (__v2di) __B,
3605 (__v2di) __W,
3606 (__mmask8) __U);
3609 extern __inline __m128i
3610 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3611 _mm_maskz_sub_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
3613 return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
3614 (__v2di) __B,
3615 (__v2di)
3616 _mm_setzero_si128 (),
3617 (__mmask8) __U);
3620 extern __inline __m256
3621 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3622 _mm256_getexp_ps (__m256 __A)
3624 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3625 (__v8sf)
3626 _mm256_setzero_ps (),
3627 (__mmask8) -1);
3630 extern __inline __m256
3631 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3632 _mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A)
3634 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3635 (__v8sf) __W,
3636 (__mmask8) __U);
3639 extern __inline __m256
3640 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3641 _mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A)
3643 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3644 (__v8sf)
3645 _mm256_setzero_ps (),
3646 (__mmask8) __U);
3649 extern __inline __m256d
3650 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3651 _mm256_getexp_pd (__m256d __A)
3653 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3654 (__v4df)
3655 _mm256_setzero_pd (),
3656 (__mmask8) -1);
3659 extern __inline __m256d
3660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3661 _mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A)
3663 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3664 (__v4df) __W,
3665 (__mmask8) __U);
3668 extern __inline __m256d
3669 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3670 _mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A)
3672 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3673 (__v4df)
3674 _mm256_setzero_pd (),
3675 (__mmask8) __U);
3678 extern __inline __m128
3679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3680 _mm_getexp_ps (__m128 __A)
3682 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3683 (__v4sf)
3684 _mm_setzero_ps (),
3685 (__mmask8) -1);
3688 extern __inline __m128
3689 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3690 _mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A)
3692 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3693 (__v4sf) __W,
3694 (__mmask8) __U);
3697 extern __inline __m128
3698 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3699 _mm_maskz_getexp_ps (__mmask8 __U, __m128 __A)
3701 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3702 (__v4sf)
3703 _mm_setzero_ps (),
3704 (__mmask8) __U);
3707 extern __inline __m128d
3708 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3709 _mm_getexp_pd (__m128d __A)
3711 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3712 (__v2df)
3713 _mm_setzero_pd (),
3714 (__mmask8) -1);
3717 extern __inline __m128d
3718 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3719 _mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A)
3721 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3722 (__v2df) __W,
3723 (__mmask8) __U);
3726 extern __inline __m128d
3727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3728 _mm_maskz_getexp_pd (__mmask8 __U, __m128d __A)
3730 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3731 (__v2df)
3732 _mm_setzero_pd (),
3733 (__mmask8) __U);
3736 extern __inline __m256i
3737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3738 _mm256_mask_srl_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3739 __m128i __B)
3741 return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A,
3742 (__v4si) __B,
3743 (__v8si) __W,
3744 (__mmask8) __U);
3747 extern __inline __m256i
3748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3749 _mm256_maskz_srl_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
3751 return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A,
3752 (__v4si) __B,
3753 (__v8si)
3754 _mm256_setzero_si256 (),
3755 (__mmask8) __U);
3758 extern __inline __m128i
3759 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3760 _mm_mask_srl_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
3761 __m128i __B)
3763 return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A,
3764 (__v4si) __B,
3765 (__v4si) __W,
3766 (__mmask8) __U);
3769 extern __inline __m128i
3770 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3771 _mm_maskz_srl_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
3773 return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A,
3774 (__v4si) __B,
3775 (__v4si)
3776 _mm_setzero_si128 (),
3777 (__mmask8) __U);
3780 extern __inline __m256i
3781 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3782 _mm256_mask_srl_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
3783 __m128i __B)
3785 return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A,
3786 (__v2di) __B,
3787 (__v4di) __W,
3788 (__mmask8) __U);
3791 extern __inline __m256i
3792 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3793 _mm256_maskz_srl_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
3795 return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A,
3796 (__v2di) __B,
3797 (__v4di)
3798 _mm256_setzero_si256 (),
3799 (__mmask8) __U);
3802 extern __inline __m128i
3803 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3804 _mm_mask_srl_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
3805 __m128i __B)
3807 return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
3808 (__v2di) __B,
3809 (__v2di) __W,
3810 (__mmask8) __U);
3813 extern __inline __m128i
3814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3815 _mm_maskz_srl_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
3817 return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
3818 (__v2di) __B,
3819 (__v2di)
3820 _mm_setzero_di (),
3821 (__mmask8) __U);
3824 extern __inline __m256i
3825 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3826 _mm256_mask_and_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3827 __m256i __B)
3829 return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A,
3830 (__v8si) __B,
3831 (__v8si) __W,
3832 (__mmask8) __U);
3835 extern __inline __m256i
3836 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3837 _mm256_maskz_and_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
3839 return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A,
3840 (__v8si) __B,
3841 (__v8si)
3842 _mm256_setzero_si256 (),
3843 (__mmask8) __U);
3846 extern __inline __m256d
3847 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3848 _mm256_scalef_pd (__m256d __A, __m256d __B)
3850 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3851 (__v4df) __B,
3852 (__v4df)
3853 _mm256_setzero_pd (),
3854 (__mmask8) -1);
3857 extern __inline __m256d
3858 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3859 _mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A,
3860 __m256d __B)
3862 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3863 (__v4df) __B,
3864 (__v4df) __W,
3865 (__mmask8) __U);
3868 extern __inline __m256d
3869 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3870 _mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B)
3872 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3873 (__v4df) __B,
3874 (__v4df)
3875 _mm256_setzero_pd (),
3876 (__mmask8) __U);
3879 extern __inline __m256
3880 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3881 _mm256_scalef_ps (__m256 __A, __m256 __B)
3883 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3884 (__v8sf) __B,
3885 (__v8sf)
3886 _mm256_setzero_ps (),
3887 (__mmask8) -1);
3890 extern __inline __m256
3891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3892 _mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A,
3893 __m256 __B)
3895 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3896 (__v8sf) __B,
3897 (__v8sf) __W,
3898 (__mmask8) __U);
3901 extern __inline __m256
3902 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3903 _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B)
3905 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3906 (__v8sf) __B,
3907 (__v8sf)
3908 _mm256_setzero_ps (),
3909 (__mmask8) __U);
3912 extern __inline __m128d
3913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3914 _mm_scalef_pd (__m128d __A, __m128d __B)
3916 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3917 (__v2df) __B,
3918 (__v2df)
3919 _mm_setzero_pd (),
3920 (__mmask8) -1);
3923 extern __inline __m128d
3924 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3925 _mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A,
3926 __m128d __B)
3928 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3929 (__v2df) __B,
3930 (__v2df) __W,
3931 (__mmask8) __U);
3934 extern __inline __m128d
3935 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3936 _mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B)
3938 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3939 (__v2df) __B,
3940 (__v2df)
3941 _mm_setzero_pd (),
3942 (__mmask8) __U);
3945 extern __inline __m128
3946 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3947 _mm_scalef_ps (__m128 __A, __m128 __B)
3949 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3950 (__v4sf) __B,
3951 (__v4sf)
3952 _mm_setzero_ps (),
3953 (__mmask8) -1);
3956 extern __inline __m128
3957 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3958 _mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
3960 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3961 (__v4sf) __B,
3962 (__v4sf) __W,
3963 (__mmask8) __U);
3966 extern __inline __m128
3967 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3968 _mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B)
3970 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3971 (__v4sf) __B,
3972 (__v4sf)
3973 _mm_setzero_ps (),
3974 (__mmask8) __U);
3977 extern __inline __m256d
3978 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3979 _mm256_mask_fmadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
3980 __m256d __C)
3982 return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
3983 (__v4df) __B,
3984 (__v4df) __C,
3985 (__mmask8) __U);
3988 extern __inline __m256d
3989 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3990 _mm256_mask3_fmadd_pd (__m256d __A, __m256d __B, __m256d __C,
3991 __mmask8 __U)
3993 return (__m256d) __builtin_ia32_vfmaddpd256_mask3 ((__v4df) __A,
3994 (__v4df) __B,
3995 (__v4df) __C,
3996 (__mmask8) __U);
3999 extern __inline __m256d
4000 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4001 _mm256_maskz_fmadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
4002 __m256d __C)
4004 return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
4005 (__v4df) __B,
4006 (__v4df) __C,
4007 (__mmask8) __U);
4010 extern __inline __m128d
4011 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4012 _mm_mask_fmadd_pd (__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
4014 return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
4015 (__v2df) __B,
4016 (__v2df) __C,
4017 (__mmask8) __U);
4020 extern __inline __m128d
4021 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4022 _mm_mask3_fmadd_pd (__m128d __A, __m128d __B, __m128d __C,
4023 __mmask8 __U)
4025 return (__m128d) __builtin_ia32_vfmaddpd128_mask3 ((__v2df) __A,
4026 (__v2df) __B,
4027 (__v2df) __C,
4028 (__mmask8) __U);
4031 extern __inline __m128d
4032 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4033 _mm_maskz_fmadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
4034 __m128d __C)
4036 return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
4037 (__v2df) __B,
4038 (__v2df) __C,
4039 (__mmask8) __U);
4042 extern __inline __m256
4043 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4044 _mm256_mask_fmadd_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
4046 return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
4047 (__v8sf) __B,
4048 (__v8sf) __C,
4049 (__mmask8) __U);
4052 extern __inline __m256
4053 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4054 _mm256_mask3_fmadd_ps (__m256 __A, __m256 __B, __m256 __C,
4055 __mmask8 __U)
4057 return (__m256) __builtin_ia32_vfmaddps256_mask3 ((__v8sf) __A,
4058 (__v8sf) __B,
4059 (__v8sf) __C,
4060 (__mmask8) __U);
4063 extern __inline __m256
4064 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4065 _mm256_maskz_fmadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
4066 __m256 __C)
4068 return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
4069 (__v8sf) __B,
4070 (__v8sf) __C,
4071 (__mmask8) __U);
4074 extern __inline __m128
4075 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4076 _mm_mask_fmadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4078 return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
4079 (__v4sf) __B,
4080 (__v4sf) __C,
4081 (__mmask8) __U);
4084 extern __inline __m128
4085 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4086 _mm_mask3_fmadd_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4088 return (__m128) __builtin_ia32_vfmaddps128_mask3 ((__v4sf) __A,
4089 (__v4sf) __B,
4090 (__v4sf) __C,
4091 (__mmask8) __U);
4094 extern __inline __m128
4095 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4096 _mm_maskz_fmadd_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4098 return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
4099 (__v4sf) __B,
4100 (__v4sf) __C,
4101 (__mmask8) __U);
4104 extern __inline __m256d
4105 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4106 _mm256_mask_fmsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
4107 __m256d __C)
4109 return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
4110 (__v4df) __B,
4111 -(__v4df) __C,
4112 (__mmask8) __U);
4115 extern __inline __m256d
4116 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4117 _mm256_mask3_fmsub_pd (__m256d __A, __m256d __B, __m256d __C,
4118 __mmask8 __U)
4120 return (__m256d) __builtin_ia32_vfmsubpd256_mask3 ((__v4df) __A,
4121 (__v4df) __B,
4122 (__v4df) __C,
4123 (__mmask8) __U);
4126 extern __inline __m256d
4127 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4128 _mm256_maskz_fmsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
4129 __m256d __C)
4131 return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
4132 (__v4df) __B,
4133 -(__v4df) __C,
4134 (__mmask8) __U);
4137 extern __inline __m128d
4138 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4139 _mm_mask_fmsub_pd (__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
4141 return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
4142 (__v2df) __B,
4143 -(__v2df) __C,
4144 (__mmask8) __U);
4147 extern __inline __m128d
4148 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4149 _mm_mask3_fmsub_pd (__m128d __A, __m128d __B, __m128d __C,
4150 __mmask8 __U)
4152 return (__m128d) __builtin_ia32_vfmsubpd128_mask3 ((__v2df) __A,
4153 (__v2df) __B,
4154 (__v2df) __C,
4155 (__mmask8) __U);
4158 extern __inline __m128d
4159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4160 _mm_maskz_fmsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
4161 __m128d __C)
4163 return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
4164 (__v2df) __B,
4165 -(__v2df) __C,
4166 (__mmask8) __U);
4169 extern __inline __m256
4170 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4171 _mm256_mask_fmsub_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
4173 return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
4174 (__v8sf) __B,
4175 -(__v8sf) __C,
4176 (__mmask8) __U);
4179 extern __inline __m256
4180 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4181 _mm256_mask3_fmsub_ps (__m256 __A, __m256 __B, __m256 __C,
4182 __mmask8 __U)
4184 return (__m256) __builtin_ia32_vfmsubps256_mask3 ((__v8sf) __A,
4185 (__v8sf) __B,
4186 (__v8sf) __C,
4187 (__mmask8) __U);
4190 extern __inline __m256
4191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4192 _mm256_maskz_fmsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
4193 __m256 __C)
4195 return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
4196 (__v8sf) __B,
4197 -(__v8sf) __C,
4198 (__mmask8) __U);
4201 extern __inline __m128
4202 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4203 _mm_mask_fmsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4205 return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
4206 (__v4sf) __B,
4207 -(__v4sf) __C,
4208 (__mmask8) __U);
4211 extern __inline __m128
4212 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4213 _mm_mask3_fmsub_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4215 return (__m128) __builtin_ia32_vfmsubps128_mask3 ((__v4sf) __A,
4216 (__v4sf) __B,
4217 (__v4sf) __C,
4218 (__mmask8) __U);
4221 extern __inline __m128
4222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4223 _mm_maskz_fmsub_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4225 return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
4226 (__v4sf) __B,
4227 -(__v4sf) __C,
4228 (__mmask8) __U);
4231 extern __inline __m256d
4232 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4233 _mm256_mask_fmaddsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
4234 __m256d __C)
4236 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
4237 (__v4df) __B,
4238 (__v4df) __C,
4239 (__mmask8) __U);
4242 extern __inline __m256d
4243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4244 _mm256_mask3_fmaddsub_pd (__m256d __A, __m256d __B, __m256d __C,
4245 __mmask8 __U)
4247 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask3 ((__v4df) __A,
4248 (__v4df) __B,
4249 (__v4df) __C,
4250 (__mmask8)
4251 __U);
4254 extern __inline __m256d
4255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4256 _mm256_maskz_fmaddsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
4257 __m256d __C)
4259 return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
4260 (__v4df) __B,
4261 (__v4df) __C,
4262 (__mmask8)
4263 __U);
4266 extern __inline __m128d
4267 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4268 _mm_mask_fmaddsub_pd (__m128d __A, __mmask8 __U, __m128d __B,
4269 __m128d __C)
4271 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
4272 (__v2df) __B,
4273 (__v2df) __C,
4274 (__mmask8) __U);
4277 extern __inline __m128d
4278 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4279 _mm_mask3_fmaddsub_pd (__m128d __A, __m128d __B, __m128d __C,
4280 __mmask8 __U)
4282 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask3 ((__v2df) __A,
4283 (__v2df) __B,
4284 (__v2df) __C,
4285 (__mmask8)
4286 __U);
4289 extern __inline __m128d
4290 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4291 _mm_maskz_fmaddsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
4292 __m128d __C)
4294 return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
4295 (__v2df) __B,
4296 (__v2df) __C,
4297 (__mmask8)
4298 __U);
4301 extern __inline __m256
4302 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4303 _mm256_mask_fmaddsub_ps (__m256 __A, __mmask8 __U, __m256 __B,
4304 __m256 __C)
4306 return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
4307 (__v8sf) __B,
4308 (__v8sf) __C,
4309 (__mmask8) __U);
4312 extern __inline __m256
4313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4314 _mm256_mask3_fmaddsub_ps (__m256 __A, __m256 __B, __m256 __C,
4315 __mmask8 __U)
4317 return (__m256) __builtin_ia32_vfmaddsubps256_mask3 ((__v8sf) __A,
4318 (__v8sf) __B,
4319 (__v8sf) __C,
4320 (__mmask8) __U);
4323 extern __inline __m256
4324 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4325 _mm256_maskz_fmaddsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
4326 __m256 __C)
4328 return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
4329 (__v8sf) __B,
4330 (__v8sf) __C,
4331 (__mmask8) __U);
4334 extern __inline __m128
4335 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4336 _mm_mask_fmaddsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4338 return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
4339 (__v4sf) __B,
4340 (__v4sf) __C,
4341 (__mmask8) __U);
4344 extern __inline __m128
4345 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4346 _mm_mask3_fmaddsub_ps (__m128 __A, __m128 __B, __m128 __C,
4347 __mmask8 __U)
4349 return (__m128) __builtin_ia32_vfmaddsubps128_mask3 ((__v4sf) __A,
4350 (__v4sf) __B,
4351 (__v4sf) __C,
4352 (__mmask8) __U);
4355 extern __inline __m128
4356 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4357 _mm_maskz_fmaddsub_ps (__mmask8 __U, __m128 __A, __m128 __B,
4358 __m128 __C)
4360 return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
4361 (__v4sf) __B,
4362 (__v4sf) __C,
4363 (__mmask8) __U);
4366 extern __inline __m256d
4367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4368 _mm256_mask_fmsubadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
4369 __m256d __C)
4371 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
4372 (__v4df) __B,
4373 -(__v4df) __C,
4374 (__mmask8) __U);
4377 extern __inline __m256d
4378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4379 _mm256_mask3_fmsubadd_pd (__m256d __A, __m256d __B, __m256d __C,
4380 __mmask8 __U)
4382 return (__m256d) __builtin_ia32_vfmsubaddpd256_mask3 ((__v4df) __A,
4383 (__v4df) __B,
4384 (__v4df) __C,
4385 (__mmask8)
4386 __U);
4389 extern __inline __m256d
4390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4391 _mm256_maskz_fmsubadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
4392 __m256d __C)
4394 return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
4395 (__v4df) __B,
4396 -(__v4df) __C,
4397 (__mmask8)
4398 __U);
4401 extern __inline __m128d
4402 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4403 _mm_mask_fmsubadd_pd (__m128d __A, __mmask8 __U, __m128d __B,
4404 __m128d __C)
4406 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
4407 (__v2df) __B,
4408 -(__v2df) __C,
4409 (__mmask8) __U);
4412 extern __inline __m128d
4413 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4414 _mm_mask3_fmsubadd_pd (__m128d __A, __m128d __B, __m128d __C,
4415 __mmask8 __U)
4417 return (__m128d) __builtin_ia32_vfmsubaddpd128_mask3 ((__v2df) __A,
4418 (__v2df) __B,
4419 (__v2df) __C,
4420 (__mmask8)
4421 __U);
4424 extern __inline __m128d
4425 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4426 _mm_maskz_fmsubadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
4427 __m128d __C)
4429 return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
4430 (__v2df) __B,
4431 -(__v2df) __C,
4432 (__mmask8)
4433 __U);
4436 extern __inline __m256
4437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4438 _mm256_mask_fmsubadd_ps (__m256 __A, __mmask8 __U, __m256 __B,
4439 __m256 __C)
4441 return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
4442 (__v8sf) __B,
4443 -(__v8sf) __C,
4444 (__mmask8) __U);
4447 extern __inline __m256
4448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4449 _mm256_mask3_fmsubadd_ps (__m256 __A, __m256 __B, __m256 __C,
4450 __mmask8 __U)
4452 return (__m256) __builtin_ia32_vfmsubaddps256_mask3 ((__v8sf) __A,
4453 (__v8sf) __B,
4454 (__v8sf) __C,
4455 (__mmask8) __U);
4458 extern __inline __m256
4459 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4460 _mm256_maskz_fmsubadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
4461 __m256 __C)
4463 return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
4464 (__v8sf) __B,
4465 -(__v8sf) __C,
4466 (__mmask8) __U);
4469 extern __inline __m128
4470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4471 _mm_mask_fmsubadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4473 return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
4474 (__v4sf) __B,
4475 -(__v4sf) __C,
4476 (__mmask8) __U);
4479 extern __inline __m128
4480 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4481 _mm_mask3_fmsubadd_ps (__m128 __A, __m128 __B, __m128 __C,
4482 __mmask8 __U)
4484 return (__m128) __builtin_ia32_vfmsubaddps128_mask3 ((__v4sf) __A,
4485 (__v4sf) __B,
4486 (__v4sf) __C,
4487 (__mmask8) __U);
4490 extern __inline __m128
4491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4492 _mm_maskz_fmsubadd_ps (__mmask8 __U, __m128 __A, __m128 __B,
4493 __m128 __C)
4495 return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
4496 (__v4sf) __B,
4497 -(__v4sf) __C,
4498 (__mmask8) __U);
4501 extern __inline __m256d
4502 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4503 _mm256_mask_fnmadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
4504 __m256d __C)
4506 return (__m256d) __builtin_ia32_vfnmaddpd256_mask ((__v4df) __A,
4507 (__v4df) __B,
4508 (__v4df) __C,
4509 (__mmask8) __U);
4512 extern __inline __m256d
4513 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4514 _mm256_mask3_fnmadd_pd (__m256d __A, __m256d __B, __m256d __C,
4515 __mmask8 __U)
4517 return (__m256d) __builtin_ia32_vfmaddpd256_mask3 (-(__v4df) __A,
4518 (__v4df) __B,
4519 (__v4df) __C,
4520 (__mmask8) __U);
4523 extern __inline __m256d
4524 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4525 _mm256_maskz_fnmadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
4526 __m256d __C)
4528 return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
4529 (__v4df) __B,
4530 (__v4df) __C,
4531 (__mmask8) __U);
4534 extern __inline __m128d
4535 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4536 _mm_mask_fnmadd_pd (__m128d __A, __mmask8 __U, __m128d __B,
4537 __m128d __C)
4539 return (__m128d) __builtin_ia32_vfnmaddpd128_mask ((__v2df) __A,
4540 (__v2df) __B,
4541 (__v2df) __C,
4542 (__mmask8) __U);
4545 extern __inline __m128d
4546 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4547 _mm_mask3_fnmadd_pd (__m128d __A, __m128d __B, __m128d __C,
4548 __mmask8 __U)
4550 return (__m128d) __builtin_ia32_vfmaddpd128_mask3 (-(__v2df) __A,
4551 (__v2df) __B,
4552 (__v2df) __C,
4553 (__mmask8) __U);
4556 extern __inline __m128d
4557 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4558 _mm_maskz_fnmadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
4559 __m128d __C)
4561 return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
4562 (__v2df) __B,
4563 (__v2df) __C,
4564 (__mmask8) __U);
4567 extern __inline __m256
4568 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4569 _mm256_mask_fnmadd_ps (__m256 __A, __mmask8 __U, __m256 __B,
4570 __m256 __C)
4572 return (__m256) __builtin_ia32_vfnmaddps256_mask ((__v8sf) __A,
4573 (__v8sf) __B,
4574 (__v8sf) __C,
4575 (__mmask8) __U);
4578 extern __inline __m256
4579 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4580 _mm256_mask3_fnmadd_ps (__m256 __A, __m256 __B, __m256 __C,
4581 __mmask8 __U)
4583 return (__m256) __builtin_ia32_vfmaddps256_mask3 (-(__v8sf) __A,
4584 (__v8sf) __B,
4585 (__v8sf) __C,
4586 (__mmask8) __U);
4589 extern __inline __m256
4590 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4591 _mm256_maskz_fnmadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
4592 __m256 __C)
4594 return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
4595 (__v8sf) __B,
4596 (__v8sf) __C,
4597 (__mmask8) __U);
4600 extern __inline __m128
4601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4602 _mm_mask_fnmadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4604 return (__m128) __builtin_ia32_vfnmaddps128_mask ((__v4sf) __A,
4605 (__v4sf) __B,
4606 (__v4sf) __C,
4607 (__mmask8) __U);
4610 extern __inline __m128
4611 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4612 _mm_mask3_fnmadd_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4614 return (__m128) __builtin_ia32_vfmaddps128_mask3 (-(__v4sf) __A,
4615 (__v4sf) __B,
4616 (__v4sf) __C,
4617 (__mmask8) __U);
4620 extern __inline __m128
4621 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4622 _mm_maskz_fnmadd_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4624 return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
4625 (__v4sf) __B,
4626 (__v4sf) __C,
4627 (__mmask8) __U);
4630 extern __inline __m256d
4631 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4632 _mm256_mask_fnmsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
4633 __m256d __C)
4635 return (__m256d) __builtin_ia32_vfnmsubpd256_mask ((__v4df) __A,
4636 (__v4df) __B,
4637 (__v4df) __C,
4638 (__mmask8) __U);
4641 extern __inline __m256d
4642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4643 _mm256_mask3_fnmsub_pd (__m256d __A, __m256d __B, __m256d __C,
4644 __mmask8 __U)
4646 return (__m256d) __builtin_ia32_vfnmsubpd256_mask3 ((__v4df) __A,
4647 (__v4df) __B,
4648 (__v4df) __C,
4649 (__mmask8) __U);
4652 extern __inline __m256d
4653 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4654 _mm256_maskz_fnmsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
4655 __m256d __C)
4657 return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
4658 (__v4df) __B,
4659 -(__v4df) __C,
4660 (__mmask8) __U);
4663 extern __inline __m128d
4664 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4665 _mm_mask_fnmsub_pd (__m128d __A, __mmask8 __U, __m128d __B,
4666 __m128d __C)
4668 return (__m128d) __builtin_ia32_vfnmsubpd128_mask ((__v2df) __A,
4669 (__v2df) __B,
4670 (__v2df) __C,
4671 (__mmask8) __U);
4674 extern __inline __m128d
4675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4676 _mm_mask3_fnmsub_pd (__m128d __A, __m128d __B, __m128d __C,
4677 __mmask8 __U)
4679 return (__m128d) __builtin_ia32_vfnmsubpd128_mask3 ((__v2df) __A,
4680 (__v2df) __B,
4681 (__v2df) __C,
4682 (__mmask8) __U);
4685 extern __inline __m128d
4686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4687 _mm_maskz_fnmsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
4688 __m128d __C)
4690 return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
4691 (__v2df) __B,
4692 -(__v2df) __C,
4693 (__mmask8) __U);
4696 extern __inline __m256
4697 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4698 _mm256_mask_fnmsub_ps (__m256 __A, __mmask8 __U, __m256 __B,
4699 __m256 __C)
4701 return (__m256) __builtin_ia32_vfnmsubps256_mask ((__v8sf) __A,
4702 (__v8sf) __B,
4703 (__v8sf) __C,
4704 (__mmask8) __U);
4707 extern __inline __m256
4708 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4709 _mm256_mask3_fnmsub_ps (__m256 __A, __m256 __B, __m256 __C,
4710 __mmask8 __U)
4712 return (__m256) __builtin_ia32_vfnmsubps256_mask3 ((__v8sf) __A,
4713 (__v8sf) __B,
4714 (__v8sf) __C,
4715 (__mmask8) __U);
4718 extern __inline __m256
4719 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4720 _mm256_maskz_fnmsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
4721 __m256 __C)
4723 return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
4724 (__v8sf) __B,
4725 -(__v8sf) __C,
4726 (__mmask8) __U);
4729 extern __inline __m128
4730 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4731 _mm_mask_fnmsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4733 return (__m128) __builtin_ia32_vfnmsubps128_mask ((__v4sf) __A,
4734 (__v4sf) __B,
4735 (__v4sf) __C,
4736 (__mmask8) __U);
4739 extern __inline __m128
4740 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4741 _mm_mask3_fnmsub_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4743 return (__m128) __builtin_ia32_vfnmsubps128_mask3 ((__v4sf) __A,
4744 (__v4sf) __B,
4745 (__v4sf) __C,
4746 (__mmask8) __U);
4749 extern __inline __m128
4750 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4751 _mm_maskz_fnmsub_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4753 return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
4754 (__v4sf) __B,
4755 -(__v4sf) __C,
4756 (__mmask8) __U);
4759 extern __inline __m128i
4760 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4761 _mm_mask_and_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
4762 __m128i __B)
4764 return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
4765 (__v4si) __B,
4766 (__v4si) __W,
4767 (__mmask8) __U);
4770 extern __inline __m128i
4771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4772 _mm_maskz_and_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4774 return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
4775 (__v4si) __B,
4776 (__v4si)
4777 _mm_setzero_si128 (),
4778 (__mmask8) __U);
4781 extern __inline __m256i
4782 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4783 _mm256_mask_andnot_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
4784 __m256i __B)
4786 return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A,
4787 (__v8si) __B,
4788 (__v8si) __W,
4789 (__mmask8) __U);
4792 extern __inline __m256i
4793 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4794 _mm256_maskz_andnot_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4796 return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A,
4797 (__v8si) __B,
4798 (__v8si)
4799 _mm256_setzero_si256 (),
4800 (__mmask8) __U);
4803 extern __inline __m128i
4804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4805 _mm_mask_andnot_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
4806 __m128i __B)
4808 return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A,
4809 (__v4si) __B,
4810 (__v4si) __W,
4811 (__mmask8) __U);
4814 extern __inline __m128i
4815 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4816 _mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4818 return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A,
4819 (__v4si) __B,
4820 (__v4si)
4821 _mm_setzero_si128 (),
4822 (__mmask8) __U);
4825 extern __inline __m256i
4826 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4827 _mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
4828 __m256i __B)
4830 return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A,
4831 (__v8si) __B,
4832 (__v8si) __W,
4833 (__mmask8) __U);
4836 extern __inline __m256i
4837 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4838 _mm256_maskz_or_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4840 return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A,
4841 (__v8si) __B,
4842 (__v8si)
4843 _mm256_setzero_si256 (),
4844 (__mmask8) __U);
4847 extern __inline __m128i
4848 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4849 _mm_mask_or_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4851 return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
4852 (__v4si) __B,
4853 (__v4si) __W,
4854 (__mmask8) __U);
4857 extern __inline __m128i
4858 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4859 _mm_maskz_or_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4861 return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
4862 (__v4si) __B,
4863 (__v4si)
4864 _mm_setzero_si128 (),
4865 (__mmask8) __U);
4868 extern __inline __m256i
4869 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4870 _mm256_mask_xor_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
4871 __m256i __B)
4873 return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A,
4874 (__v8si) __B,
4875 (__v8si) __W,
4876 (__mmask8) __U);
4879 extern __inline __m256i
4880 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4881 _mm256_maskz_xor_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4883 return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A,
4884 (__v8si) __B,
4885 (__v8si)
4886 _mm256_setzero_si256 (),
4887 (__mmask8) __U);
4890 extern __inline __m128i
4891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4892 _mm_mask_xor_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
4893 __m128i __B)
4895 return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A,
4896 (__v4si) __B,
4897 (__v4si) __W,
4898 (__mmask8) __U);
4901 extern __inline __m128i
4902 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4903 _mm_maskz_xor_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4905 return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A,
4906 (__v4si) __B,
4907 (__v4si)
4908 _mm_setzero_si128 (),
4909 (__mmask8) __U);
4912 extern __inline __m128
4913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4914 _mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A)
4916 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
4917 (__v4sf) __W,
4918 (__mmask8) __U);
4921 extern __inline __m128
4922 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4923 _mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A)
4925 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
4926 (__v4sf)
4927 _mm_setzero_ps (),
4928 (__mmask8) __U);
4931 extern __inline __m128
4932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4933 _mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A)
4935 return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
4936 (__v4sf) __W,
4937 (__mmask8) __U);
4940 extern __inline __m128
4941 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4942 _mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A)
4944 return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
4945 (__v4sf)
4946 _mm_setzero_ps (),
4947 (__mmask8) __U);
4950 extern __inline __m256i
4951 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4952 _mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A)
4954 return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
4955 (__v8si) __W,
4956 (__mmask8) __U);
4959 extern __inline __m256i
4960 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4961 _mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A)
4963 return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
4964 (__v8si)
4965 _mm256_setzero_si256 (),
4966 (__mmask8) __U);
4969 extern __inline __m128i
4970 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4971 _mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A)
4973 return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
4974 (__v4si) __W,
4975 (__mmask8) __U);
4978 extern __inline __m128i
4979 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4980 _mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A)
4982 return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
4983 (__v4si)
4984 _mm_setzero_si128 (),
4985 (__mmask8) __U);
4988 extern __inline __m256i
4989 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4990 _mm256_cvtps_epu32 (__m256 __A)
4992 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
4993 (__v8si)
4994 _mm256_setzero_si256 (),
4995 (__mmask8) -1);
4998 extern __inline __m256i
4999 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5000 _mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A)
5002 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
5003 (__v8si) __W,
5004 (__mmask8) __U);
5007 extern __inline __m256i
5008 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5009 _mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A)
5011 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
5012 (__v8si)
5013 _mm256_setzero_si256 (),
5014 (__mmask8) __U);
5017 extern __inline __m128i
5018 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5019 _mm_cvtps_epu32 (__m128 __A)
5021 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
5022 (__v4si)
5023 _mm_setzero_si128 (),
5024 (__mmask8) -1);
5027 extern __inline __m128i
5028 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5029 _mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A)
5031 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
5032 (__v4si) __W,
5033 (__mmask8) __U);
5036 extern __inline __m128i
5037 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5038 _mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A)
5040 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
5041 (__v4si)
5042 _mm_setzero_si128 (),
5043 (__mmask8) __U);
5046 extern __inline __m256d
5047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5048 _mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A)
5050 return (__m256d) __builtin_ia32_movddup256_mask ((__v4df) __A,
5051 (__v4df) __W,
5052 (__mmask8) __U);
5055 extern __inline __m256d
5056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5057 _mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A)
5059 return (__m256d) __builtin_ia32_movddup256_mask ((__v4df) __A,
5060 (__v4df)
5061 _mm256_setzero_pd (),
5062 (__mmask8) __U);
5065 extern __inline __m128d
5066 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5067 _mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A)
5069 return (__m128d) __builtin_ia32_movddup128_mask ((__v2df) __A,
5070 (__v2df) __W,
5071 (__mmask8) __U);
5074 extern __inline __m128d
5075 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5076 _mm_maskz_movedup_pd (__mmask8 __U, __m128d __A)
5078 return (__m128d) __builtin_ia32_movddup128_mask ((__v2df) __A,
5079 (__v2df)
5080 _mm_setzero_pd (),
5081 (__mmask8) __U);
5084 extern __inline __m256
5085 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5086 _mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A)
5088 return (__m256) __builtin_ia32_movshdup256_mask ((__v8sf) __A,
5089 (__v8sf) __W,
5090 (__mmask8) __U);
5093 extern __inline __m256
5094 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5095 _mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A)
5097 return (__m256) __builtin_ia32_movshdup256_mask ((__v8sf) __A,
5098 (__v8sf)
5099 _mm256_setzero_ps (),
5100 (__mmask8) __U);
5103 extern __inline __m128
5104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5105 _mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A)
5107 return (__m128) __builtin_ia32_movshdup128_mask ((__v4sf) __A,
5108 (__v4sf) __W,
5109 (__mmask8) __U);
5112 extern __inline __m128
5113 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5114 _mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A)
5116 return (__m128) __builtin_ia32_movshdup128_mask ((__v4sf) __A,
5117 (__v4sf)
5118 _mm_setzero_ps (),
5119 (__mmask8) __U);
5122 extern __inline __m256
5123 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5124 _mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A)
5126 return (__m256) __builtin_ia32_movsldup256_mask ((__v8sf) __A,
5127 (__v8sf) __W,
5128 (__mmask8) __U);
5131 extern __inline __m256
5132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5133 _mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A)
5135 return (__m256) __builtin_ia32_movsldup256_mask ((__v8sf) __A,
5136 (__v8sf)
5137 _mm256_setzero_ps (),
5138 (__mmask8) __U);
5141 extern __inline __m128
5142 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5143 _mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A)
5145 return (__m128) __builtin_ia32_movsldup128_mask ((__v4sf) __A,
5146 (__v4sf) __W,
5147 (__mmask8) __U);
5150 extern __inline __m128
5151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5152 _mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A)
5154 return (__m128) __builtin_ia32_movsldup128_mask ((__v4sf) __A,
5155 (__v4sf)
5156 _mm_setzero_ps (),
5157 (__mmask8) __U);
5160 extern __inline __m128i
5161 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5162 _mm_mask_unpackhi_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
5163 __m128i __B)
5165 return (__m128i) __builtin_ia32_punpckhdq128_mask ((__v4si) __A,
5166 (__v4si) __B,
5167 (__v4si) __W,
5168 (__mmask8) __U);
5171 extern __inline __m128i
5172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5173 _mm_maskz_unpackhi_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
5175 return (__m128i) __builtin_ia32_punpckhdq128_mask ((__v4si) __A,
5176 (__v4si) __B,
5177 (__v4si)
5178 _mm_setzero_si128 (),
5179 (__mmask8) __U);
5182 extern __inline __m256i
5183 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5184 _mm256_mask_unpackhi_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
5185 __m256i __B)
5187 return (__m256i) __builtin_ia32_punpckhdq256_mask ((__v8si) __A,
5188 (__v8si) __B,
5189 (__v8si) __W,
5190 (__mmask8) __U);
5193 extern __inline __m256i
5194 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5195 _mm256_maskz_unpackhi_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
5197 return (__m256i) __builtin_ia32_punpckhdq256_mask ((__v8si) __A,
5198 (__v8si) __B,
5199 (__v8si)
5200 _mm256_setzero_si256 (),
5201 (__mmask8) __U);
5204 extern __inline __m128i
5205 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5206 _mm_mask_unpackhi_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
5207 __m128i __B)
5209 return (__m128i) __builtin_ia32_punpckhqdq128_mask ((__v2di) __A,
5210 (__v2di) __B,
5211 (__v2di) __W,
5212 (__mmask8) __U);
5215 extern __inline __m128i
5216 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5217 _mm_maskz_unpackhi_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
5219 return (__m128i) __builtin_ia32_punpckhqdq128_mask ((__v2di) __A,
5220 (__v2di) __B,
5221 (__v2di)
5222 _mm_setzero_di (),
5223 (__mmask8) __U);
5226 extern __inline __m256i
5227 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5228 _mm256_mask_unpackhi_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
5229 __m256i __B)
5231 return (__m256i) __builtin_ia32_punpckhqdq256_mask ((__v4di) __A,
5232 (__v4di) __B,
5233 (__v4di) __W,
5234 (__mmask8) __U);
5237 extern __inline __m256i
5238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5239 _mm256_maskz_unpackhi_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
5241 return (__m256i) __builtin_ia32_punpckhqdq256_mask ((__v4di) __A,
5242 (__v4di) __B,
5243 (__v4di)
5244 _mm256_setzero_si256 (),
5245 (__mmask8) __U);
5248 extern __inline __m128i
5249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5250 _mm_mask_unpacklo_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
5251 __m128i __B)
5253 return (__m128i) __builtin_ia32_punpckldq128_mask ((__v4si) __A,
5254 (__v4si) __B,
5255 (__v4si) __W,
5256 (__mmask8) __U);
5259 extern __inline __m128i
5260 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5261 _mm_maskz_unpacklo_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
5263 return (__m128i) __builtin_ia32_punpckldq128_mask ((__v4si) __A,
5264 (__v4si) __B,
5265 (__v4si)
5266 _mm_setzero_si128 (),
5267 (__mmask8) __U);
5270 extern __inline __m256i
5271 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5272 _mm256_mask_unpacklo_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
5273 __m256i __B)
5275 return (__m256i) __builtin_ia32_punpckldq256_mask ((__v8si) __A,
5276 (__v8si) __B,
5277 (__v8si) __W,
5278 (__mmask8) __U);
5281 extern __inline __m256i
5282 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5283 _mm256_maskz_unpacklo_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
5285 return (__m256i) __builtin_ia32_punpckldq256_mask ((__v8si) __A,
5286 (__v8si) __B,
5287 (__v8si)
5288 _mm256_setzero_si256 (),
5289 (__mmask8) __U);
5292 extern __inline __m128i
5293 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5294 _mm_mask_unpacklo_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
5295 __m128i __B)
5297 return (__m128i) __builtin_ia32_punpcklqdq128_mask ((__v2di) __A,
5298 (__v2di) __B,
5299 (__v2di) __W,
5300 (__mmask8) __U);
5303 extern __inline __m128i
5304 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5305 _mm_maskz_unpacklo_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
5307 return (__m128i) __builtin_ia32_punpcklqdq128_mask ((__v2di) __A,
5308 (__v2di) __B,
5309 (__v2di)
5310 _mm_setzero_di (),
5311 (__mmask8) __U);
5314 extern __inline __m256i
5315 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5316 _mm256_mask_unpacklo_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
5317 __m256i __B)
5319 return (__m256i) __builtin_ia32_punpcklqdq256_mask ((__v4di) __A,
5320 (__v4di) __B,
5321 (__v4di) __W,
5322 (__mmask8) __U);
5325 extern __inline __m256i
5326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5327 _mm256_maskz_unpacklo_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
5329 return (__m256i) __builtin_ia32_punpcklqdq256_mask ((__v4di) __A,
5330 (__v4di) __B,
5331 (__v4di)
5332 _mm256_setzero_si256 (),
5333 (__mmask8) __U);
5336 extern __inline __mmask8
5337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5338 _mm_cmpeq_epu32_mask (__m128i __A, __m128i __B)
5340 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5341 (__v4si) __B, 0,
5342 (__mmask8) -1);
5345 extern __inline __mmask8
5346 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5347 _mm_cmpeq_epi32_mask (__m128i __A, __m128i __B)
5349 return (__mmask8) __builtin_ia32_pcmpeqd128_mask ((__v4si) __A,
5350 (__v4si) __B,
5351 (__mmask8) -1);
5354 extern __inline __mmask8
5355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5356 _mm_mask_cmpeq_epu32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5358 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5359 (__v4si) __B, 0, __U);
5362 extern __inline __mmask8
5363 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5364 _mm_mask_cmpeq_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5366 return (__mmask8) __builtin_ia32_pcmpeqd128_mask ((__v4si) __A,
5367 (__v4si) __B, __U);
5370 extern __inline __mmask8
5371 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5372 _mm256_cmpeq_epu32_mask (__m256i __A, __m256i __B)
5374 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5375 (__v8si) __B, 0,
5376 (__mmask8) -1);
5379 extern __inline __mmask8
5380 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5381 _mm256_cmpeq_epi32_mask (__m256i __A, __m256i __B)
5383 return (__mmask8) __builtin_ia32_pcmpeqd256_mask ((__v8si) __A,
5384 (__v8si) __B,
5385 (__mmask8) -1);
5388 extern __inline __mmask8
5389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5390 _mm256_mask_cmpeq_epu32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5392 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5393 (__v8si) __B, 0, __U);
5396 extern __inline __mmask8
5397 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5398 _mm256_mask_cmpeq_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5400 return (__mmask8) __builtin_ia32_pcmpeqd256_mask ((__v8si) __A,
5401 (__v8si) __B, __U);
5404 extern __inline __mmask8
5405 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5406 _mm_cmpeq_epu64_mask (__m128i __A, __m128i __B)
5408 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5409 (__v2di) __B, 0,
5410 (__mmask8) -1);
5413 extern __inline __mmask8
5414 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5415 _mm_cmpeq_epi64_mask (__m128i __A, __m128i __B)
5417 return (__mmask8) __builtin_ia32_pcmpeqq128_mask ((__v2di) __A,
5418 (__v2di) __B,
5419 (__mmask8) -1);
5422 extern __inline __mmask8
5423 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5424 _mm_mask_cmpeq_epu64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5426 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5427 (__v2di) __B, 0, __U);
5430 extern __inline __mmask8
5431 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5432 _mm_mask_cmpeq_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5434 return (__mmask8) __builtin_ia32_pcmpeqq128_mask ((__v2di) __A,
5435 (__v2di) __B, __U);
5438 extern __inline __mmask8
5439 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5440 _mm256_cmpeq_epu64_mask (__m256i __A, __m256i __B)
5442 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5443 (__v4di) __B, 0,
5444 (__mmask8) -1);
5447 extern __inline __mmask8
5448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5449 _mm256_cmpeq_epi64_mask (__m256i __A, __m256i __B)
5451 return (__mmask8) __builtin_ia32_pcmpeqq256_mask ((__v4di) __A,
5452 (__v4di) __B,
5453 (__mmask8) -1);
5456 extern __inline __mmask8
5457 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5458 _mm256_mask_cmpeq_epu64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5460 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5461 (__v4di) __B, 0, __U);
5464 extern __inline __mmask8
5465 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5466 _mm256_mask_cmpeq_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5468 return (__mmask8) __builtin_ia32_pcmpeqq256_mask ((__v4di) __A,
5469 (__v4di) __B, __U);
5472 extern __inline __mmask8
5473 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5474 _mm_cmpgt_epu32_mask (__m128i __A, __m128i __B)
5476 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5477 (__v4si) __B, 6,
5478 (__mmask8) -1);
5481 extern __inline __mmask8
5482 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5483 _mm_cmpgt_epi32_mask (__m128i __A, __m128i __B)
5485 return (__mmask8) __builtin_ia32_pcmpgtd128_mask ((__v4si) __A,
5486 (__v4si) __B,
5487 (__mmask8) -1);
5490 extern __inline __mmask8
5491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5492 _mm_mask_cmpgt_epu32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5494 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5495 (__v4si) __B, 6, __U);
5498 extern __inline __mmask8
5499 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5500 _mm_mask_cmpgt_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5502 return (__mmask8) __builtin_ia32_pcmpgtd128_mask ((__v4si) __A,
5503 (__v4si) __B, __U);
5506 extern __inline __mmask8
5507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5508 _mm256_cmpgt_epu32_mask (__m256i __A, __m256i __B)
5510 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5511 (__v8si) __B, 6,
5512 (__mmask8) -1);
5515 extern __inline __mmask8
5516 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5517 _mm256_cmpgt_epi32_mask (__m256i __A, __m256i __B)
5519 return (__mmask8) __builtin_ia32_pcmpgtd256_mask ((__v8si) __A,
5520 (__v8si) __B,
5521 (__mmask8) -1);
5524 extern __inline __mmask8
5525 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5526 _mm256_mask_cmpgt_epu32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5528 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5529 (__v8si) __B, 6, __U);
5532 extern __inline __mmask8
5533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5534 _mm256_mask_cmpgt_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5536 return (__mmask8) __builtin_ia32_pcmpgtd256_mask ((__v8si) __A,
5537 (__v8si) __B, __U);
5540 extern __inline __mmask8
5541 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5542 _mm_cmpgt_epu64_mask (__m128i __A, __m128i __B)
5544 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5545 (__v2di) __B, 6,
5546 (__mmask8) -1);
5549 extern __inline __mmask8
5550 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5551 _mm_cmpgt_epi64_mask (__m128i __A, __m128i __B)
5553 return (__mmask8) __builtin_ia32_pcmpgtq128_mask ((__v2di) __A,
5554 (__v2di) __B,
5555 (__mmask8) -1);
5558 extern __inline __mmask8
5559 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5560 _mm_mask_cmpgt_epu64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5562 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5563 (__v2di) __B, 6, __U);
5566 extern __inline __mmask8
5567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5568 _mm_mask_cmpgt_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5570 return (__mmask8) __builtin_ia32_pcmpgtq128_mask ((__v2di) __A,
5571 (__v2di) __B, __U);
5574 extern __inline __mmask8
5575 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5576 _mm256_cmpgt_epu64_mask (__m256i __A, __m256i __B)
5578 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5579 (__v4di) __B, 6,
5580 (__mmask8) -1);
5583 extern __inline __mmask8
5584 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5585 _mm256_cmpgt_epi64_mask (__m256i __A, __m256i __B)
5587 return (__mmask8) __builtin_ia32_pcmpgtq256_mask ((__v4di) __A,
5588 (__v4di) __B,
5589 (__mmask8) -1);
5592 extern __inline __mmask8
5593 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5594 _mm256_mask_cmpgt_epu64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5596 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5597 (__v4di) __B, 6, __U);
5600 extern __inline __mmask8
5601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5602 _mm256_mask_cmpgt_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5604 return (__mmask8) __builtin_ia32_pcmpgtq256_mask ((__v4di) __A,
5605 (__v4di) __B, __U);
5608 extern __inline __mmask8
5609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5610 _mm_test_epi32_mask (__m128i __A, __m128i __B)
5612 return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
5613 (__v4si) __B,
5614 (__mmask8) -1);
5617 extern __inline __mmask8
5618 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5619 _mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5621 return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
5622 (__v4si) __B, __U);
5625 extern __inline __mmask8
5626 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5627 _mm256_test_epi32_mask (__m256i __A, __m256i __B)
5629 return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
5630 (__v8si) __B,
5631 (__mmask8) -1);
5634 extern __inline __mmask8
5635 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5636 _mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5638 return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
5639 (__v8si) __B, __U);
5642 extern __inline __mmask8
5643 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5644 _mm_test_epi64_mask (__m128i __A, __m128i __B)
5646 return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
5647 (__v2di) __B,
5648 (__mmask8) -1);
5651 extern __inline __mmask8
5652 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5653 _mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5655 return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
5656 (__v2di) __B, __U);
5659 extern __inline __mmask8
5660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5661 _mm256_test_epi64_mask (__m256i __A, __m256i __B)
5663 return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
5664 (__v4di) __B,
5665 (__mmask8) -1);
5668 extern __inline __mmask8
5669 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5670 _mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5672 return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
5673 (__v4di) __B, __U);
5676 extern __inline __mmask8
5677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5678 _mm_testn_epi32_mask (__m128i __A, __m128i __B)
5680 return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
5681 (__v4si) __B,
5682 (__mmask8) -1);
5685 extern __inline __mmask8
5686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5687 _mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5689 return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
5690 (__v4si) __B, __U);
5693 extern __inline __mmask8
5694 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5695 _mm256_testn_epi32_mask (__m256i __A, __m256i __B)
5697 return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
5698 (__v8si) __B,
5699 (__mmask8) -1);
5702 extern __inline __mmask8
5703 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5704 _mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5706 return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
5707 (__v8si) __B, __U);
5710 extern __inline __mmask8
5711 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5712 _mm_testn_epi64_mask (__m128i __A, __m128i __B)
5714 return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
5715 (__v2di) __B,
5716 (__mmask8) -1);
5719 extern __inline __mmask8
5720 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5721 _mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5723 return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
5724 (__v2di) __B, __U);
5727 extern __inline __mmask8
5728 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5729 _mm256_testn_epi64_mask (__m256i __A, __m256i __B)
5731 return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
5732 (__v4di) __B,
5733 (__mmask8) -1);
5736 extern __inline __mmask8
5737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5738 _mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5740 return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
5741 (__v4di) __B, __U);
5744 extern __inline __m256d
5745 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5746 _mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A)
5748 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
5749 (__v4df) __W,
5750 (__mmask8) __U);
5753 extern __inline __m256d
5754 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5755 _mm256_maskz_compress_pd (__mmask8 __U, __m256d __A)
5757 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
5758 (__v4df)
5759 _mm256_setzero_pd (),
5760 (__mmask8) __U);
5763 extern __inline void
5764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5765 _mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A)
5767 __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
5768 (__v4df) __A,
5769 (__mmask8) __U);
5772 extern __inline __m128d
5773 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5774 _mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A)
5776 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
5777 (__v2df) __W,
5778 (__mmask8) __U);
5781 extern __inline __m128d
5782 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5783 _mm_maskz_compress_pd (__mmask8 __U, __m128d __A)
5785 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
5786 (__v2df)
5787 _mm_setzero_pd (),
5788 (__mmask8) __U);
5791 extern __inline void
5792 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5793 _mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A)
5795 __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
5796 (__v2df) __A,
5797 (__mmask8) __U);
5800 extern __inline __m256
5801 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5802 _mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A)
5804 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
5805 (__v8sf) __W,
5806 (__mmask8) __U);
5809 extern __inline __m256
5810 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5811 _mm256_maskz_compress_ps (__mmask8 __U, __m256 __A)
5813 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
5814 (__v8sf)
5815 _mm256_setzero_ps (),
5816 (__mmask8) __U);
5819 extern __inline void
5820 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5821 _mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A)
5823 __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
5824 (__v8sf) __A,
5825 (__mmask8) __U);
5828 extern __inline __m128
5829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5830 _mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A)
5832 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
5833 (__v4sf) __W,
5834 (__mmask8) __U);
5837 extern __inline __m128
5838 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5839 _mm_maskz_compress_ps (__mmask8 __U, __m128 __A)
5841 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
5842 (__v4sf)
5843 _mm_setzero_ps (),
5844 (__mmask8) __U);
5847 extern __inline void
5848 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5849 _mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A)
5851 __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
5852 (__v4sf) __A,
5853 (__mmask8) __U);
5856 extern __inline __m256i
5857 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5858 _mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
5860 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
5861 (__v4di) __W,
5862 (__mmask8) __U);
5865 extern __inline __m256i
5866 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5867 _mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A)
5869 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
5870 (__v4di)
5871 _mm256_setzero_si256 (),
5872 (__mmask8) __U);
5875 extern __inline void
5876 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5877 _mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A)
5879 __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
5880 (__v4di) __A,
5881 (__mmask8) __U);
5884 extern __inline __m128i
5885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5886 _mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
5888 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
5889 (__v2di) __W,
5890 (__mmask8) __U);
5893 extern __inline __m128i
5894 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5895 _mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A)
5897 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
5898 (__v2di)
5899 _mm_setzero_di (),
5900 (__mmask8) __U);
5903 extern __inline void
5904 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5905 _mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A)
5907 __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
5908 (__v2di) __A,
5909 (__mmask8) __U);
5912 extern __inline __m256i
5913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5914 _mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
5916 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
5917 (__v8si) __W,
5918 (__mmask8) __U);
5921 extern __inline __m256i
5922 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5923 _mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A)
5925 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
5926 (__v8si)
5927 _mm256_setzero_si256 (),
5928 (__mmask8) __U);
5931 extern __inline void
5932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5933 _mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A)
5935 __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
5936 (__v8si) __A,
5937 (__mmask8) __U);
5940 extern __inline __m128i
5941 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5942 _mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
5944 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
5945 (__v4si) __W,
5946 (__mmask8) __U);
5949 extern __inline __m128i
5950 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5951 _mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A)
5953 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
5954 (__v4si)
5955 _mm_setzero_si128 (),
5956 (__mmask8) __U);
5959 extern __inline void
5960 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5961 _mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A)
5963 __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
5964 (__v4si) __A,
5965 (__mmask8) __U);
5968 extern __inline __m256d
5969 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5970 _mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A)
5972 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
5973 (__v4df) __W,
5974 (__mmask8) __U);
5977 extern __inline __m256d
5978 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5979 _mm256_maskz_expand_pd (__mmask8 __U, __m256d __A)
5981 return (__m256d) __builtin_ia32_expanddf256_maskz ((__v4df) __A,
5982 (__v4df)
5983 _mm256_setzero_pd (),
5984 (__mmask8) __U);
5987 extern __inline __m256d
5988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5989 _mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P)
5991 return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
5992 (__v4df) __W,
5993 (__mmask8)
5994 __U);
5997 extern __inline __m256d
5998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5999 _mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
6001 return (__m256d) __builtin_ia32_expandloaddf256_maskz ((__v4df *) __P,
6002 (__v4df)
6003 _mm256_setzero_pd (),
6004 (__mmask8)
6005 __U);
6008 extern __inline __m128d
6009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6010 _mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A)
6012 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
6013 (__v2df) __W,
6014 (__mmask8) __U);
6017 extern __inline __m128d
6018 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6019 _mm_maskz_expand_pd (__mmask8 __U, __m128d __A)
6021 return (__m128d) __builtin_ia32_expanddf128_maskz ((__v2df) __A,
6022 (__v2df)
6023 _mm_setzero_pd (),
6024 (__mmask8) __U);
6027 extern __inline __m128d
6028 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6029 _mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P)
6031 return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
6032 (__v2df) __W,
6033 (__mmask8)
6034 __U);
6037 extern __inline __m128d
6038 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6039 _mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
6041 return (__m128d) __builtin_ia32_expandloaddf128_maskz ((__v2df *) __P,
6042 (__v2df)
6043 _mm_setzero_pd (),
6044 (__mmask8)
6045 __U);
6048 extern __inline __m256
6049 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6050 _mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A)
6052 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
6053 (__v8sf) __W,
6054 (__mmask8) __U);
6057 extern __inline __m256
6058 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6059 _mm256_maskz_expand_ps (__mmask8 __U, __m256 __A)
6061 return (__m256) __builtin_ia32_expandsf256_maskz ((__v8sf) __A,
6062 (__v8sf)
6063 _mm256_setzero_ps (),
6064 (__mmask8) __U);
6067 extern __inline __m256
6068 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6069 _mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P)
6071 return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
6072 (__v8sf) __W,
6073 (__mmask8) __U);
6076 extern __inline __m256
6077 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6078 _mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P)
6080 return (__m256) __builtin_ia32_expandloadsf256_maskz ((__v8sf *) __P,
6081 (__v8sf)
6082 _mm256_setzero_ps (),
6083 (__mmask8)
6084 __U);
6087 extern __inline __m128
6088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6089 _mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A)
6091 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
6092 (__v4sf) __W,
6093 (__mmask8) __U);
6096 extern __inline __m128
6097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6098 _mm_maskz_expand_ps (__mmask8 __U, __m128 __A)
6100 return (__m128) __builtin_ia32_expandsf128_maskz ((__v4sf) __A,
6101 (__v4sf)
6102 _mm_setzero_ps (),
6103 (__mmask8) __U);
6106 extern __inline __m128
6107 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6108 _mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P)
6110 return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
6111 (__v4sf) __W,
6112 (__mmask8) __U);
6115 extern __inline __m128
6116 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6117 _mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P)
6119 return (__m128) __builtin_ia32_expandloadsf128_maskz ((__v4sf *) __P,
6120 (__v4sf)
6121 _mm_setzero_ps (),
6122 (__mmask8)
6123 __U);
6126 extern __inline __m256i
6127 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6128 _mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
6130 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
6131 (__v4di) __W,
6132 (__mmask8) __U);
6135 extern __inline __m256i
6136 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6137 _mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A)
6139 return (__m256i) __builtin_ia32_expanddi256_maskz ((__v4di) __A,
6140 (__v4di)
6141 _mm256_setzero_si256 (),
6142 (__mmask8) __U);
6145 extern __inline __m256i
6146 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6147 _mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U,
6148 void const *__P)
6150 return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
6151 (__v4di) __W,
6152 (__mmask8)
6153 __U);
6156 extern __inline __m256i
6157 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6158 _mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
6160 return (__m256i) __builtin_ia32_expandloaddi256_maskz ((__v4di *) __P,
6161 (__v4di)
6162 _mm256_setzero_si256 (),
6163 (__mmask8)
6164 __U);
6167 extern __inline __m128i
6168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6169 _mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
6171 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
6172 (__v2di) __W,
6173 (__mmask8) __U);
6176 extern __inline __m128i
6177 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6178 _mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A)
6180 return (__m128i) __builtin_ia32_expanddi128_maskz ((__v2di) __A,
6181 (__v2di)
6182 _mm_setzero_si128 (),
6183 (__mmask8) __U);
6186 extern __inline __m128i
6187 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6188 _mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
6190 return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
6191 (__v2di) __W,
6192 (__mmask8)
6193 __U);
6196 extern __inline __m128i
6197 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6198 _mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
6200 return (__m128i) __builtin_ia32_expandloaddi128_maskz ((__v2di *) __P,
6201 (__v2di)
6202 _mm_setzero_si128 (),
6203 (__mmask8)
6204 __U);
6207 extern __inline __m256i
6208 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6209 _mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
6211 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
6212 (__v8si) __W,
6213 (__mmask8) __U);
6216 extern __inline __m256i
6217 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6218 _mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A)
6220 return (__m256i) __builtin_ia32_expandsi256_maskz ((__v8si) __A,
6221 (__v8si)
6222 _mm256_setzero_si256 (),
6223 (__mmask8) __U);
6226 extern __inline __m256i
6227 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6228 _mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U,
6229 void const *__P)
6231 return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
6232 (__v8si) __W,
6233 (__mmask8)
6234 __U);
6237 extern __inline __m256i
6238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6239 _mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P)
6241 return (__m256i) __builtin_ia32_expandloadsi256_maskz ((__v8si *) __P,
6242 (__v8si)
6243 _mm256_setzero_si256 (),
6244 (__mmask8)
6245 __U);
6248 extern __inline __m128i
6249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6250 _mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
6252 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
6253 (__v4si) __W,
6254 (__mmask8) __U);
6257 extern __inline __m128i
6258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6259 _mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A)
6261 return (__m128i) __builtin_ia32_expandsi128_maskz ((__v4si) __A,
6262 (__v4si)
6263 _mm_setzero_si128 (),
6264 (__mmask8) __U);
6267 extern __inline __m128i
6268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6269 _mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
6271 return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
6272 (__v4si) __W,
6273 (__mmask8)
6274 __U);
6277 extern __inline __m128i
6278 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6279 _mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P)
6281 return (__m128i) __builtin_ia32_expandloadsi128_maskz ((__v4si *) __P,
6282 (__v4si)
6283 _mm_setzero_si128 (),
6284 (__mmask8)
6285 __U);
6288 extern __inline __m256d
6289 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6290 _mm256_permutex2var_pd (__m256d __A, __m256i __I, __m256d __B)
6292 return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
6293 /* idx */ ,
6294 (__v4df) __A,
6295 (__v4df) __B,
6296 (__mmask8) -
6300 extern __inline __m256d
6301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6302 _mm256_mask_permutex2var_pd (__m256d __A, __mmask8 __U, __m256i __I,
6303 __m256d __B)
6305 return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
6306 /* idx */ ,
6307 (__v4df) __A,
6308 (__v4df) __B,
6309 (__mmask8)
6310 __U);
6313 extern __inline __m256d
6314 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6315 _mm256_mask2_permutex2var_pd (__m256d __A, __m256i __I, __mmask8 __U,
6316 __m256d __B)
6318 return (__m256d) __builtin_ia32_vpermi2varpd256_mask ((__v4df) __A,
6319 (__v4di) __I
6320 /* idx */ ,
6321 (__v4df) __B,
6322 (__mmask8)
6323 __U);
6326 extern __inline __m256d
6327 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6328 _mm256_maskz_permutex2var_pd (__mmask8 __U, __m256d __A, __m256i __I,
6329 __m256d __B)
6331 return (__m256d) __builtin_ia32_vpermt2varpd256_maskz ((__v4di) __I
6332 /* idx */ ,
6333 (__v4df) __A,
6334 (__v4df) __B,
6335 (__mmask8)
6336 __U);
6339 extern __inline __m256
6340 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6341 _mm256_permutex2var_ps (__m256 __A, __m256i __I, __m256 __B)
6343 return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
6344 /* idx */ ,
6345 (__v8sf) __A,
6346 (__v8sf) __B,
6347 (__mmask8) -1);
6350 extern __inline __m256
6351 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6352 _mm256_mask_permutex2var_ps (__m256 __A, __mmask8 __U, __m256i __I,
6353 __m256 __B)
6355 return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
6356 /* idx */ ,
6357 (__v8sf) __A,
6358 (__v8sf) __B,
6359 (__mmask8) __U);
6362 extern __inline __m256
6363 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6364 _mm256_mask2_permutex2var_ps (__m256 __A, __m256i __I, __mmask8 __U,
6365 __m256 __B)
6367 return (__m256) __builtin_ia32_vpermi2varps256_mask ((__v8sf) __A,
6368 (__v8si) __I
6369 /* idx */ ,
6370 (__v8sf) __B,
6371 (__mmask8) __U);
6374 extern __inline __m256
6375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6376 _mm256_maskz_permutex2var_ps (__mmask8 __U, __m256 __A, __m256i __I,
6377 __m256 __B)
6379 return (__m256) __builtin_ia32_vpermt2varps256_maskz ((__v8si) __I
6380 /* idx */ ,
6381 (__v8sf) __A,
6382 (__v8sf) __B,
6383 (__mmask8)
6384 __U);
6387 extern __inline __m128i
6388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6389 _mm_permutex2var_epi64 (__m128i __A, __m128i __I, __m128i __B)
6391 return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
6392 /* idx */ ,
6393 (__v2di) __A,
6394 (__v2di) __B,
6395 (__mmask8) -1);
6398 extern __inline __m128i
6399 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6400 _mm_mask_permutex2var_epi64 (__m128i __A, __mmask8 __U, __m128i __I,
6401 __m128i __B)
6403 return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
6404 /* idx */ ,
6405 (__v2di) __A,
6406 (__v2di) __B,
6407 (__mmask8) __U);
6410 extern __inline __m128i
6411 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6412 _mm_mask2_permutex2var_epi64 (__m128i __A, __m128i __I, __mmask8 __U,
6413 __m128i __B)
6415 return (__m128i) __builtin_ia32_vpermi2varq128_mask ((__v2di) __A,
6416 (__v2di) __I
6417 /* idx */ ,
6418 (__v2di) __B,
6419 (__mmask8) __U);
6422 extern __inline __m128i
6423 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6424 _mm_maskz_permutex2var_epi64 (__mmask8 __U, __m128i __A, __m128i __I,
6425 __m128i __B)
6427 return (__m128i) __builtin_ia32_vpermt2varq128_maskz ((__v2di) __I
6428 /* idx */ ,
6429 (__v2di) __A,
6430 (__v2di) __B,
6431 (__mmask8)
6432 __U);
6435 extern __inline __m128i
6436 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6437 _mm_permutex2var_epi32 (__m128i __A, __m128i __I, __m128i __B)
6439 return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
6440 /* idx */ ,
6441 (__v4si) __A,
6442 (__v4si) __B,
6443 (__mmask8) -1);
6446 extern __inline __m128i
6447 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6448 _mm_mask_permutex2var_epi32 (__m128i __A, __mmask8 __U, __m128i __I,
6449 __m128i __B)
6451 return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
6452 /* idx */ ,
6453 (__v4si) __A,
6454 (__v4si) __B,
6455 (__mmask8) __U);
6458 extern __inline __m128i
6459 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6460 _mm_mask2_permutex2var_epi32 (__m128i __A, __m128i __I, __mmask8 __U,
6461 __m128i __B)
6463 return (__m128i) __builtin_ia32_vpermi2vard128_mask ((__v4si) __A,
6464 (__v4si) __I
6465 /* idx */ ,
6466 (__v4si) __B,
6467 (__mmask8) __U);
6470 extern __inline __m128i
6471 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6472 _mm_maskz_permutex2var_epi32 (__mmask8 __U, __m128i __A, __m128i __I,
6473 __m128i __B)
6475 return (__m128i) __builtin_ia32_vpermt2vard128_maskz ((__v4si) __I
6476 /* idx */ ,
6477 (__v4si) __A,
6478 (__v4si) __B,
6479 (__mmask8)
6480 __U);
6483 extern __inline __m256i
6484 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6485 _mm256_permutex2var_epi64 (__m256i __A, __m256i __I, __m256i __B)
6487 return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
6488 /* idx */ ,
6489 (__v4di) __A,
6490 (__v4di) __B,
6491 (__mmask8) -1);
6494 extern __inline __m256i
6495 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6496 _mm256_mask_permutex2var_epi64 (__m256i __A, __mmask8 __U, __m256i __I,
6497 __m256i __B)
6499 return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
6500 /* idx */ ,
6501 (__v4di) __A,
6502 (__v4di) __B,
6503 (__mmask8) __U);
6506 extern __inline __m256i
6507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6508 _mm256_mask2_permutex2var_epi64 (__m256i __A, __m256i __I,
6509 __mmask8 __U, __m256i __B)
6511 return (__m256i) __builtin_ia32_vpermi2varq256_mask ((__v4di) __A,
6512 (__v4di) __I
6513 /* idx */ ,
6514 (__v4di) __B,
6515 (__mmask8) __U);
6518 extern __inline __m256i
6519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6520 _mm256_maskz_permutex2var_epi64 (__mmask8 __U, __m256i __A,
6521 __m256i __I, __m256i __B)
6523 return (__m256i) __builtin_ia32_vpermt2varq256_maskz ((__v4di) __I
6524 /* idx */ ,
6525 (__v4di) __A,
6526 (__v4di) __B,
6527 (__mmask8)
6528 __U);
6531 extern __inline __m256i
6532 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6533 _mm256_permutex2var_epi32 (__m256i __A, __m256i __I, __m256i __B)
6535 return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
6536 /* idx */ ,
6537 (__v8si) __A,
6538 (__v8si) __B,
6539 (__mmask8) -1);
6542 extern __inline __m256i
6543 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6544 _mm256_mask_permutex2var_epi32 (__m256i __A, __mmask8 __U, __m256i __I,
6545 __m256i __B)
6547 return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
6548 /* idx */ ,
6549 (__v8si) __A,
6550 (__v8si) __B,
6551 (__mmask8) __U);
6554 extern __inline __m256i
6555 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6556 _mm256_mask2_permutex2var_epi32 (__m256i __A, __m256i __I,
6557 __mmask8 __U, __m256i __B)
6559 return (__m256i) __builtin_ia32_vpermi2vard256_mask ((__v8si) __A,
6560 (__v8si) __I
6561 /* idx */ ,
6562 (__v8si) __B,
6563 (__mmask8) __U);
6566 extern __inline __m256i
6567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6568 _mm256_maskz_permutex2var_epi32 (__mmask8 __U, __m256i __A,
6569 __m256i __I, __m256i __B)
6571 return (__m256i) __builtin_ia32_vpermt2vard256_maskz ((__v8si) __I
6572 /* idx */ ,
6573 (__v8si) __A,
6574 (__v8si) __B,
6575 (__mmask8)
6576 __U);
6579 extern __inline __m128d
6580 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6581 _mm_permutex2var_pd (__m128d __A, __m128i __I, __m128d __B)
6583 return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
6584 /* idx */ ,
6585 (__v2df) __A,
6586 (__v2df) __B,
6587 (__mmask8) -
6591 extern __inline __m128d
6592 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6593 _mm_mask_permutex2var_pd (__m128d __A, __mmask8 __U, __m128i __I,
6594 __m128d __B)
6596 return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
6597 /* idx */ ,
6598 (__v2df) __A,
6599 (__v2df) __B,
6600 (__mmask8)
6601 __U);
6604 extern __inline __m128d
6605 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6606 _mm_mask2_permutex2var_pd (__m128d __A, __m128i __I, __mmask8 __U,
6607 __m128d __B)
6609 return (__m128d) __builtin_ia32_vpermi2varpd128_mask ((__v2df) __A,
6610 (__v2di) __I
6611 /* idx */ ,
6612 (__v2df) __B,
6613 (__mmask8)
6614 __U);
6617 extern __inline __m128d
6618 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6619 _mm_maskz_permutex2var_pd (__mmask8 __U, __m128d __A, __m128i __I,
6620 __m128d __B)
6622 return (__m128d) __builtin_ia32_vpermt2varpd128_maskz ((__v2di) __I
6623 /* idx */ ,
6624 (__v2df) __A,
6625 (__v2df) __B,
6626 (__mmask8)
6627 __U);
6630 extern __inline __m128
6631 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6632 _mm_permutex2var_ps (__m128 __A, __m128i __I, __m128 __B)
6634 return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
6635 /* idx */ ,
6636 (__v4sf) __A,
6637 (__v4sf) __B,
6638 (__mmask8) -1);
6641 extern __inline __m128
6642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6643 _mm_mask_permutex2var_ps (__m128 __A, __mmask8 __U, __m128i __I,
6644 __m128 __B)
6646 return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
6647 /* idx */ ,
6648 (__v4sf) __A,
6649 (__v4sf) __B,
6650 (__mmask8) __U);
6653 extern __inline __m128
6654 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6655 _mm_mask2_permutex2var_ps (__m128 __A, __m128i __I, __mmask8 __U,
6656 __m128 __B)
6658 return (__m128) __builtin_ia32_vpermi2varps128_mask ((__v4sf) __A,
6659 (__v4si) __I
6660 /* idx */ ,
6661 (__v4sf) __B,
6662 (__mmask8) __U);
6665 extern __inline __m128
6666 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6667 _mm_maskz_permutex2var_ps (__mmask8 __U, __m128 __A, __m128i __I,
6668 __m128 __B)
6670 return (__m128) __builtin_ia32_vpermt2varps128_maskz ((__v4si) __I
6671 /* idx */ ,
6672 (__v4sf) __A,
6673 (__v4sf) __B,
6674 (__mmask8)
6675 __U);
6678 extern __inline __m128i
6679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6680 _mm_srav_epi64 (__m128i __X, __m128i __Y)
6682 return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
6683 (__v2di) __Y,
6684 (__v2di)
6685 _mm_setzero_di (),
6686 (__mmask8) -1);
6689 extern __inline __m128i
6690 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6691 _mm_mask_srav_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
6692 __m128i __Y)
6694 return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
6695 (__v2di) __Y,
6696 (__v2di) __W,
6697 (__mmask8) __U);
6700 extern __inline __m128i
6701 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6702 _mm_maskz_srav_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
6704 return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
6705 (__v2di) __Y,
6706 (__v2di)
6707 _mm_setzero_di (),
6708 (__mmask8) __U);
6711 extern __inline __m256i
6712 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6713 _mm256_mask_sllv_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
6714 __m256i __Y)
6716 return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X,
6717 (__v8si) __Y,
6718 (__v8si) __W,
6719 (__mmask8) __U);
6722 extern __inline __m256i
6723 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6724 _mm256_maskz_sllv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
6726 return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X,
6727 (__v8si) __Y,
6728 (__v8si)
6729 _mm256_setzero_si256 (),
6730 (__mmask8) __U);
6733 extern __inline __m128i
6734 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6735 _mm_mask_sllv_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
6736 __m128i __Y)
6738 return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X,
6739 (__v4si) __Y,
6740 (__v4si) __W,
6741 (__mmask8) __U);
6744 extern __inline __m128i
6745 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6746 _mm_maskz_sllv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
6748 return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X,
6749 (__v4si) __Y,
6750 (__v4si)
6751 _mm_setzero_si128 (),
6752 (__mmask8) __U);
6755 extern __inline __m256i
6756 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6757 _mm256_mask_sllv_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
6758 __m256i __Y)
6760 return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X,
6761 (__v4di) __Y,
6762 (__v4di) __W,
6763 (__mmask8) __U);
6766 extern __inline __m256i
6767 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6768 _mm256_maskz_sllv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
6770 return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X,
6771 (__v4di) __Y,
6772 (__v4di)
6773 _mm256_setzero_si256 (),
6774 (__mmask8) __U);
6777 extern __inline __m128i
6778 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6779 _mm_mask_sllv_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
6780 __m128i __Y)
6782 return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
6783 (__v2di) __Y,
6784 (__v2di) __W,
6785 (__mmask8) __U);
6788 extern __inline __m128i
6789 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6790 _mm_maskz_sllv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
6792 return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
6793 (__v2di) __Y,
6794 (__v2di)
6795 _mm_setzero_di (),
6796 (__mmask8) __U);
6799 extern __inline __m256i
6800 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6801 _mm256_mask_srav_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
6802 __m256i __Y)
6804 return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X,
6805 (__v8si) __Y,
6806 (__v8si) __W,
6807 (__mmask8) __U);
6810 extern __inline __m256i
6811 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6812 _mm256_maskz_srav_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
6814 return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X,
6815 (__v8si) __Y,
6816 (__v8si)
6817 _mm256_setzero_si256 (),
6818 (__mmask8) __U);
6821 extern __inline __m128i
6822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6823 _mm_mask_srav_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
6824 __m128i __Y)
6826 return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X,
6827 (__v4si) __Y,
6828 (__v4si) __W,
6829 (__mmask8) __U);
6832 extern __inline __m128i
6833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6834 _mm_maskz_srav_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
6836 return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X,
6837 (__v4si) __Y,
6838 (__v4si)
6839 _mm_setzero_si128 (),
6840 (__mmask8) __U);
6843 extern __inline __m256i
6844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6845 _mm256_mask_srlv_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
6846 __m256i __Y)
6848 return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X,
6849 (__v8si) __Y,
6850 (__v8si) __W,
6851 (__mmask8) __U);
6854 extern __inline __m256i
6855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6856 _mm256_maskz_srlv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
6858 return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X,
6859 (__v8si) __Y,
6860 (__v8si)
6861 _mm256_setzero_si256 (),
6862 (__mmask8) __U);
6865 extern __inline __m128i
6866 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6867 _mm_mask_srlv_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
6868 __m128i __Y)
6870 return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X,
6871 (__v4si) __Y,
6872 (__v4si) __W,
6873 (__mmask8) __U);
6876 extern __inline __m128i
6877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6878 _mm_maskz_srlv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
6880 return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X,
6881 (__v4si) __Y,
6882 (__v4si)
6883 _mm_setzero_si128 (),
6884 (__mmask8) __U);
6887 extern __inline __m256i
6888 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6889 _mm256_mask_srlv_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
6890 __m256i __Y)
6892 return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X,
6893 (__v4di) __Y,
6894 (__v4di) __W,
6895 (__mmask8) __U);
6898 extern __inline __m256i
6899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6900 _mm256_maskz_srlv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
6902 return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X,
6903 (__v4di) __Y,
6904 (__v4di)
6905 _mm256_setzero_si256 (),
6906 (__mmask8) __U);
6909 extern __inline __m128i
6910 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6911 _mm_mask_srlv_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
6912 __m128i __Y)
6914 return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
6915 (__v2di) __Y,
6916 (__v2di) __W,
6917 (__mmask8) __U);
6920 extern __inline __m128i
6921 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6922 _mm_maskz_srlv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
6924 return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
6925 (__v2di) __Y,
6926 (__v2di)
6927 _mm_setzero_di (),
6928 (__mmask8) __U);
6931 extern __inline __m256i
6932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6933 _mm256_rolv_epi32 (__m256i __A, __m256i __B)
6935 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
6936 (__v8si) __B,
6937 (__v8si)
6938 _mm256_setzero_si256 (),
6939 (__mmask8) -1);
6942 extern __inline __m256i
6943 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6944 _mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
6945 __m256i __B)
6947 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
6948 (__v8si) __B,
6949 (__v8si) __W,
6950 (__mmask8) __U);
6953 extern __inline __m256i
6954 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6955 _mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
6957 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
6958 (__v8si) __B,
6959 (__v8si)
6960 _mm256_setzero_si256 (),
6961 (__mmask8) __U);
6964 extern __inline __m128i
6965 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6966 _mm_rolv_epi32 (__m128i __A, __m128i __B)
6968 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
6969 (__v4si) __B,
6970 (__v4si)
6971 _mm_setzero_si128 (),
6972 (__mmask8) -1);
6975 extern __inline __m128i
6976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6977 _mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
6978 __m128i __B)
6980 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
6981 (__v4si) __B,
6982 (__v4si) __W,
6983 (__mmask8) __U);
6986 extern __inline __m128i
6987 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6988 _mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
6990 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
6991 (__v4si) __B,
6992 (__v4si)
6993 _mm_setzero_si128 (),
6994 (__mmask8) __U);
6997 extern __inline __m256i
6998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6999 _mm256_rorv_epi32 (__m256i __A, __m256i __B)
7001 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
7002 (__v8si) __B,
7003 (__v8si)
7004 _mm256_setzero_si256 (),
7005 (__mmask8) -1);
7008 extern __inline __m256i
7009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7010 _mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
7011 __m256i __B)
7013 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
7014 (__v8si) __B,
7015 (__v8si) __W,
7016 (__mmask8) __U);
7019 extern __inline __m256i
7020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7021 _mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
7023 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
7024 (__v8si) __B,
7025 (__v8si)
7026 _mm256_setzero_si256 (),
7027 (__mmask8) __U);
7030 extern __inline __m128i
7031 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7032 _mm_rorv_epi32 (__m128i __A, __m128i __B)
7034 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
7035 (__v4si) __B,
7036 (__v4si)
7037 _mm_setzero_si128 (),
7038 (__mmask8) -1);
7041 extern __inline __m128i
7042 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7043 _mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
7044 __m128i __B)
7046 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
7047 (__v4si) __B,
7048 (__v4si) __W,
7049 (__mmask8) __U);
7052 extern __inline __m128i
7053 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7054 _mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
7056 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
7057 (__v4si) __B,
7058 (__v4si)
7059 _mm_setzero_si128 (),
7060 (__mmask8) __U);
7063 extern __inline __m256i
7064 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7065 _mm256_rolv_epi64 (__m256i __A, __m256i __B)
7067 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
7068 (__v4di) __B,
7069 (__v4di)
7070 _mm256_setzero_si256 (),
7071 (__mmask8) -1);
7074 extern __inline __m256i
7075 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7076 _mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7077 __m256i __B)
7079 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
7080 (__v4di) __B,
7081 (__v4di) __W,
7082 (__mmask8) __U);
7085 extern __inline __m256i
7086 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7087 _mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7089 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
7090 (__v4di) __B,
7091 (__v4di)
7092 _mm256_setzero_si256 (),
7093 (__mmask8) __U);
7096 extern __inline __m128i
7097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7098 _mm_rolv_epi64 (__m128i __A, __m128i __B)
7100 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
7101 (__v2di) __B,
7102 (__v2di)
7103 _mm_setzero_di (),
7104 (__mmask8) -1);
7107 extern __inline __m128i
7108 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7109 _mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7110 __m128i __B)
7112 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
7113 (__v2di) __B,
7114 (__v2di) __W,
7115 (__mmask8) __U);
7118 extern __inline __m128i
7119 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7120 _mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7122 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
7123 (__v2di) __B,
7124 (__v2di)
7125 _mm_setzero_di (),
7126 (__mmask8) __U);
7129 extern __inline __m256i
7130 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7131 _mm256_rorv_epi64 (__m256i __A, __m256i __B)
7133 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
7134 (__v4di) __B,
7135 (__v4di)
7136 _mm256_setzero_si256 (),
7137 (__mmask8) -1);
7140 extern __inline __m256i
7141 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7142 _mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7143 __m256i __B)
7145 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
7146 (__v4di) __B,
7147 (__v4di) __W,
7148 (__mmask8) __U);
7151 extern __inline __m256i
7152 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7153 _mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7155 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
7156 (__v4di) __B,
7157 (__v4di)
7158 _mm256_setzero_si256 (),
7159 (__mmask8) __U);
7162 extern __inline __m128i
7163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7164 _mm_rorv_epi64 (__m128i __A, __m128i __B)
7166 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
7167 (__v2di) __B,
7168 (__v2di)
7169 _mm_setzero_di (),
7170 (__mmask8) -1);
7173 extern __inline __m128i
7174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7175 _mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7176 __m128i __B)
7178 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
7179 (__v2di) __B,
7180 (__v2di) __W,
7181 (__mmask8) __U);
7184 extern __inline __m128i
7185 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7186 _mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7188 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
7189 (__v2di) __B,
7190 (__v2di)
7191 _mm_setzero_di (),
7192 (__mmask8) __U);
7195 extern __inline __m256i
7196 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7197 _mm256_srav_epi64 (__m256i __X, __m256i __Y)
7199 return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
7200 (__v4di) __Y,
7201 (__v4di)
7202 _mm256_setzero_si256 (),
7203 (__mmask8) -1);
7206 extern __inline __m256i
7207 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7208 _mm256_mask_srav_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
7209 __m256i __Y)
7211 return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
7212 (__v4di) __Y,
7213 (__v4di) __W,
7214 (__mmask8) __U);
7217 extern __inline __m256i
7218 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7219 _mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
7221 return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
7222 (__v4di) __Y,
7223 (__v4di)
7224 _mm256_setzero_si256 (),
7225 (__mmask8) __U);
7228 extern __inline __m256i
7229 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7230 _mm256_mask_and_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7231 __m256i __B)
7233 return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A,
7234 (__v4di) __B,
7235 (__v4di) __W, __U);
7238 extern __inline __m256i
7239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7240 _mm256_maskz_and_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7242 return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A,
7243 (__v4di) __B,
7244 (__v4di)
7245 _mm256_setzero_pd (),
7246 __U);
7249 extern __inline __m128i
7250 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7251 _mm_mask_and_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7252 __m128i __B)
7254 return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A,
7255 (__v2di) __B,
7256 (__v2di) __W, __U);
7259 extern __inline __m128i
7260 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7261 _mm_maskz_and_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7263 return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A,
7264 (__v2di) __B,
7265 (__v2di)
7266 _mm_setzero_pd (),
7267 __U);
7270 extern __inline __m256i
7271 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7272 _mm256_mask_andnot_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7273 __m256i __B)
7275 return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A,
7276 (__v4di) __B,
7277 (__v4di) __W, __U);
7280 extern __inline __m256i
7281 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7282 _mm256_maskz_andnot_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7284 return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A,
7285 (__v4di) __B,
7286 (__v4di)
7287 _mm256_setzero_pd (),
7288 __U);
7291 extern __inline __m128i
7292 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7293 _mm_mask_andnot_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7294 __m128i __B)
7296 return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A,
7297 (__v2di) __B,
7298 (__v2di) __W, __U);
7301 extern __inline __m128i
7302 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7303 _mm_maskz_andnot_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7305 return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A,
7306 (__v2di) __B,
7307 (__v2di)
7308 _mm_setzero_pd (),
7309 __U);
7312 extern __inline __m256i
7313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7314 _mm256_mask_or_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7315 __m256i __B)
7317 return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A,
7318 (__v4di) __B,
7319 (__v4di) __W,
7320 (__mmask8) __U);
7323 extern __inline __m256i
7324 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7325 _mm256_maskz_or_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7327 return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A,
7328 (__v4di) __B,
7329 (__v4di)
7330 _mm256_setzero_si256 (),
7331 (__mmask8) __U);
7334 extern __inline __m128i
7335 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7336 _mm_mask_or_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
7338 return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
7339 (__v2di) __B,
7340 (__v2di) __W,
7341 (__mmask8) __U);
7344 extern __inline __m128i
7345 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7346 _mm_maskz_or_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7348 return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
7349 (__v2di) __B,
7350 (__v2di)
7351 _mm_setzero_si128 (),
7352 (__mmask8) __U);
7355 extern __inline __m256i
7356 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7357 _mm256_mask_xor_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7358 __m256i __B)
7360 return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A,
7361 (__v4di) __B,
7362 (__v4di) __W,
7363 (__mmask8) __U);
7366 extern __inline __m256i
7367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7368 _mm256_maskz_xor_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7370 return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A,
7371 (__v4di) __B,
7372 (__v4di)
7373 _mm256_setzero_si256 (),
7374 (__mmask8) __U);
7377 extern __inline __m128i
7378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7379 _mm_mask_xor_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7380 __m128i __B)
7382 return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A,
7383 (__v2di) __B,
7384 (__v2di) __W,
7385 (__mmask8) __U);
7388 extern __inline __m128i
7389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7390 _mm_maskz_xor_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7392 return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A,
7393 (__v2di) __B,
7394 (__v2di)
7395 _mm_setzero_si128 (),
7396 (__mmask8) __U);
7399 extern __inline __m256d
7400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7401 _mm256_mask_max_pd (__m256d __W, __mmask8 __U, __m256d __A,
7402 __m256d __B)
7404 return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
7405 (__v4df) __B,
7406 (__v4df) __W,
7407 (__mmask8) __U);
7410 extern __inline __m256d
7411 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7412 _mm256_maskz_max_pd (__mmask8 __U, __m256d __A, __m256d __B)
7414 return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
7415 (__v4df) __B,
7416 (__v4df)
7417 _mm256_setzero_pd (),
7418 (__mmask8) __U);
7421 extern __inline __m256
7422 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7423 _mm256_mask_max_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7425 return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
7426 (__v8sf) __B,
7427 (__v8sf) __W,
7428 (__mmask8) __U);
7431 extern __inline __m256
7432 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7433 _mm256_maskz_max_ps (__mmask8 __U, __m256 __A, __m256 __B)
7435 return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
7436 (__v8sf) __B,
7437 (__v8sf)
7438 _mm256_setzero_ps (),
7439 (__mmask8) __U);
7442 extern __inline __m128
7443 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7444 _mm_mask_div_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7446 return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
7447 (__v4sf) __B,
7448 (__v4sf) __W,
7449 (__mmask8) __U);
7452 extern __inline __m128
7453 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7454 _mm_maskz_div_ps (__mmask8 __U, __m128 __A, __m128 __B)
7456 return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
7457 (__v4sf) __B,
7458 (__v4sf)
7459 _mm_setzero_ps (),
7460 (__mmask8) __U);
7463 extern __inline __m128d
7464 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7465 _mm_mask_div_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7467 return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
7468 (__v2df) __B,
7469 (__v2df) __W,
7470 (__mmask8) __U);
7473 extern __inline __m128d
7474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7475 _mm_maskz_div_pd (__mmask8 __U, __m128d __A, __m128d __B)
7477 return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
7478 (__v2df) __B,
7479 (__v2df)
7480 _mm_setzero_pd (),
7481 (__mmask8) __U);
7484 extern __inline __m256d
7485 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7486 _mm256_mask_min_pd (__m256d __W, __mmask8 __U, __m256d __A,
7487 __m256d __B)
7489 return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
7490 (__v4df) __B,
7491 (__v4df) __W,
7492 (__mmask8) __U);
7495 extern __inline __m256d
7496 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7497 _mm256_mask_div_pd (__m256d __W, __mmask8 __U, __m256d __A,
7498 __m256d __B)
7500 return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
7501 (__v4df) __B,
7502 (__v4df) __W,
7503 (__mmask8) __U);
7506 extern __inline __m256d
7507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7508 _mm256_maskz_min_pd (__mmask8 __U, __m256d __A, __m256d __B)
7510 return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
7511 (__v4df) __B,
7512 (__v4df)
7513 _mm256_setzero_pd (),
7514 (__mmask8) __U);
7517 extern __inline __m256
7518 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7519 _mm256_mask_min_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7521 return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
7522 (__v8sf) __B,
7523 (__v8sf) __W,
7524 (__mmask8) __U);
7527 extern __inline __m256d
7528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7529 _mm256_maskz_div_pd (__mmask8 __U, __m256d __A, __m256d __B)
7531 return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
7532 (__v4df) __B,
7533 (__v4df)
7534 _mm256_setzero_pd (),
7535 (__mmask8) __U);
7538 extern __inline __m256
7539 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7540 _mm256_mask_div_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7542 return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
7543 (__v8sf) __B,
7544 (__v8sf) __W,
7545 (__mmask8) __U);
7548 extern __inline __m256
7549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7550 _mm256_maskz_min_ps (__mmask8 __U, __m256 __A, __m256 __B)
7552 return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
7553 (__v8sf) __B,
7554 (__v8sf)
7555 _mm256_setzero_ps (),
7556 (__mmask8) __U);
7559 extern __inline __m256
7560 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7561 _mm256_maskz_div_ps (__mmask8 __U, __m256 __A, __m256 __B)
7563 return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
7564 (__v8sf) __B,
7565 (__v8sf)
7566 _mm256_setzero_ps (),
7567 (__mmask8) __U);
7570 extern __inline __m128
7571 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7572 _mm_mask_min_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7574 return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
7575 (__v4sf) __B,
7576 (__v4sf) __W,
7577 (__mmask8) __U);
7580 extern __inline __m128
7581 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7582 _mm_mask_mul_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7584 return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
7585 (__v4sf) __B,
7586 (__v4sf) __W,
7587 (__mmask8) __U);
7590 extern __inline __m128
7591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7592 _mm_maskz_min_ps (__mmask8 __U, __m128 __A, __m128 __B)
7594 return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
7595 (__v4sf) __B,
7596 (__v4sf)
7597 _mm_setzero_ps (),
7598 (__mmask8) __U);
7601 extern __inline __m128
7602 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7603 _mm_maskz_mul_ps (__mmask8 __U, __m128 __A, __m128 __B)
7605 return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
7606 (__v4sf) __B,
7607 (__v4sf)
7608 _mm_setzero_ps (),
7609 (__mmask8) __U);
7612 extern __inline __m128
7613 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7614 _mm_mask_max_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7616 return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
7617 (__v4sf) __B,
7618 (__v4sf) __W,
7619 (__mmask8) __U);
7622 extern __inline __m128
7623 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7624 _mm_maskz_max_ps (__mmask8 __U, __m128 __A, __m128 __B)
7626 return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
7627 (__v4sf) __B,
7628 (__v4sf)
7629 _mm_setzero_ps (),
7630 (__mmask8) __U);
7633 extern __inline __m128d
7634 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7635 _mm_mask_min_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7637 return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
7638 (__v2df) __B,
7639 (__v2df) __W,
7640 (__mmask8) __U);
7643 extern __inline __m128d
7644 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7645 _mm_maskz_min_pd (__mmask8 __U, __m128d __A, __m128d __B)
7647 return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
7648 (__v2df) __B,
7649 (__v2df)
7650 _mm_setzero_pd (),
7651 (__mmask8) __U);
7654 extern __inline __m128d
7655 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7656 _mm_mask_max_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7658 return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
7659 (__v2df) __B,
7660 (__v2df) __W,
7661 (__mmask8) __U);
7664 extern __inline __m128d
7665 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7666 _mm_maskz_max_pd (__mmask8 __U, __m128d __A, __m128d __B)
7668 return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
7669 (__v2df) __B,
7670 (__v2df)
7671 _mm_setzero_pd (),
7672 (__mmask8) __U);
7675 extern __inline __m128d
7676 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7677 _mm_mask_mul_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7679 return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
7680 (__v2df) __B,
7681 (__v2df) __W,
7682 (__mmask8) __U);
7685 extern __inline __m128d
7686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7687 _mm_maskz_mul_pd (__mmask8 __U, __m128d __A, __m128d __B)
7689 return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
7690 (__v2df) __B,
7691 (__v2df)
7692 _mm_setzero_pd (),
7693 (__mmask8) __U);
7696 extern __inline __m256
7697 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7698 _mm256_mask_mul_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7700 return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
7701 (__v8sf) __B,
7702 (__v8sf) __W,
7703 (__mmask8) __U);
7706 extern __inline __m256
7707 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7708 _mm256_maskz_mul_ps (__mmask8 __U, __m256 __A, __m256 __B)
7710 return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
7711 (__v8sf) __B,
7712 (__v8sf)
7713 _mm256_setzero_ps (),
7714 (__mmask8) __U);
7717 extern __inline __m256d
7718 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7719 _mm256_mask_mul_pd (__m256d __W, __mmask8 __U, __m256d __A,
7720 __m256d __B)
7722 return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
7723 (__v4df) __B,
7724 (__v4df) __W,
7725 (__mmask8) __U);
7728 extern __inline __m256d
7729 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7730 _mm256_maskz_mul_pd (__mmask8 __U, __m256d __A, __m256d __B)
7732 return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
7733 (__v4df) __B,
7734 (__v4df)
7735 _mm256_setzero_pd (),
7736 (__mmask8) __U);
7739 extern __inline __m256i
7740 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7741 _mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B)
7743 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
7744 (__v4di) __B,
7745 (__v4di)
7746 _mm256_setzero_si256 (),
7747 __M);
7750 extern __inline __m256i
7751 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7752 _mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
7753 __m256i __B)
7755 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
7756 (__v4di) __B,
7757 (__v4di) __W, __M);
7760 extern __inline __m256i
7761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7762 _mm256_min_epi64 (__m256i __A, __m256i __B)
7764 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
7765 (__v4di) __B,
7766 (__v4di)
7767 _mm256_setzero_si256 (),
7768 (__mmask8) -1);
7771 extern __inline __m256i
7772 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7773 _mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
7774 __m256i __B)
7776 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
7777 (__v4di) __B,
7778 (__v4di) __W, __M);
7781 extern __inline __m256i
7782 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7783 _mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B)
7785 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
7786 (__v4di) __B,
7787 (__v4di)
7788 _mm256_setzero_si256 (),
7789 __M);
7792 extern __inline __m256i
7793 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7794 _mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B)
7796 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
7797 (__v4di) __B,
7798 (__v4di)
7799 _mm256_setzero_si256 (),
7800 __M);
7803 extern __inline __m256i
7804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7805 _mm256_max_epi64 (__m256i __A, __m256i __B)
7807 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
7808 (__v4di) __B,
7809 (__v4di)
7810 _mm256_setzero_si256 (),
7811 (__mmask8) -1);
7814 extern __inline __m256i
7815 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7816 _mm256_max_epu64 (__m256i __A, __m256i __B)
7818 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
7819 (__v4di) __B,
7820 (__v4di)
7821 _mm256_setzero_si256 (),
7822 (__mmask8) -1);
7825 extern __inline __m256i
7826 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7827 _mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
7828 __m256i __B)
7830 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
7831 (__v4di) __B,
7832 (__v4di) __W, __M);
7835 extern __inline __m256i
7836 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7837 _mm256_min_epu64 (__m256i __A, __m256i __B)
7839 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
7840 (__v4di) __B,
7841 (__v4di)
7842 _mm256_setzero_si256 (),
7843 (__mmask8) -1);
7846 extern __inline __m256i
7847 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7848 _mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
7849 __m256i __B)
7851 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
7852 (__v4di) __B,
7853 (__v4di) __W, __M);
7856 extern __inline __m256i
7857 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7858 _mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B)
7860 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
7861 (__v4di) __B,
7862 (__v4di)
7863 _mm256_setzero_si256 (),
7864 __M);
7867 extern __inline __m256i
7868 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7869 _mm256_maskz_max_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
7871 return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
7872 (__v8si) __B,
7873 (__v8si)
7874 _mm256_setzero_si256 (),
7875 __M);
7878 extern __inline __m256i
7879 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7880 _mm256_mask_max_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
7881 __m256i __B)
7883 return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
7884 (__v8si) __B,
7885 (__v8si) __W, __M);
7888 extern __inline __m256i
7889 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7890 _mm256_maskz_min_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
7892 return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
7893 (__v8si) __B,
7894 (__v8si)
7895 _mm256_setzero_si256 (),
7896 __M);
7899 extern __inline __m256i
7900 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7901 _mm256_mask_min_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
7902 __m256i __B)
7904 return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
7905 (__v8si) __B,
7906 (__v8si) __W, __M);
7909 extern __inline __m256i
7910 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7911 _mm256_maskz_max_epu32 (__mmask8 __M, __m256i __A, __m256i __B)
7913 return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
7914 (__v8si) __B,
7915 (__v8si)
7916 _mm256_setzero_si256 (),
7917 __M);
7920 extern __inline __m256i
7921 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7922 _mm256_mask_max_epu32 (__m256i __W, __mmask8 __M, __m256i __A,
7923 __m256i __B)
7925 return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
7926 (__v8si) __B,
7927 (__v8si) __W, __M);
7930 extern __inline __m256i
7931 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7932 _mm256_maskz_min_epu32 (__mmask8 __M, __m256i __A, __m256i __B)
7934 return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
7935 (__v8si) __B,
7936 (__v8si)
7937 _mm256_setzero_si256 (),
7938 __M);
7941 extern __inline __m256i
7942 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7943 _mm256_mask_min_epu32 (__m256i __W, __mmask8 __M, __m256i __A,
7944 __m256i __B)
7946 return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
7947 (__v8si) __B,
7948 (__v8si) __W, __M);
7951 extern __inline __m128i
7952 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7953 _mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B)
7955 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
7956 (__v2di) __B,
7957 (__v2di)
7958 _mm_setzero_si128 (),
7959 __M);
7962 extern __inline __m128i
7963 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7964 _mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
7965 __m128i __B)
7967 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
7968 (__v2di) __B,
7969 (__v2di) __W, __M);
7972 extern __inline __m128i
7973 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7974 _mm_min_epi64 (__m128i __A, __m128i __B)
7976 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
7977 (__v2di) __B,
7978 (__v2di)
7979 _mm_setzero_di (),
7980 (__mmask8) -1);
7983 extern __inline __m128i
7984 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7985 _mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
7986 __m128i __B)
7988 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
7989 (__v2di) __B,
7990 (__v2di) __W, __M);
7993 extern __inline __m128i
7994 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7995 _mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B)
7997 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
7998 (__v2di) __B,
7999 (__v2di)
8000 _mm_setzero_si128 (),
8001 __M);
8004 extern __inline __m128i
8005 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8006 _mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B)
8008 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
8009 (__v2di) __B,
8010 (__v2di)
8011 _mm_setzero_si128 (),
8012 __M);
8015 extern __inline __m128i
8016 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8017 _mm_max_epi64 (__m128i __A, __m128i __B)
8019 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
8020 (__v2di) __B,
8021 (__v2di)
8022 _mm_setzero_di (),
8023 (__mmask8) -1);
8026 extern __inline __m128i
8027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8028 _mm_max_epu64 (__m128i __A, __m128i __B)
8030 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
8031 (__v2di) __B,
8032 (__v2di)
8033 _mm_setzero_di (),
8034 (__mmask8) -1);
8037 extern __inline __m128i
8038 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8039 _mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
8040 __m128i __B)
8042 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
8043 (__v2di) __B,
8044 (__v2di) __W, __M);
8047 extern __inline __m128i
8048 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8049 _mm_min_epu64 (__m128i __A, __m128i __B)
8051 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
8052 (__v2di) __B,
8053 (__v2di)
8054 _mm_setzero_di (),
8055 (__mmask8) -1);
8058 extern __inline __m128i
8059 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8060 _mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
8061 __m128i __B)
8063 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
8064 (__v2di) __B,
8065 (__v2di) __W, __M);
8068 extern __inline __m128i
8069 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8070 _mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B)
8072 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
8073 (__v2di) __B,
8074 (__v2di)
8075 _mm_setzero_si128 (),
8076 __M);
8079 extern __inline __m128i
8080 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8081 _mm_maskz_max_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
8083 return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
8084 (__v4si) __B,
8085 (__v4si)
8086 _mm_setzero_si128 (),
8087 __M);
8090 extern __inline __m128i
8091 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8092 _mm_mask_max_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
8093 __m128i __B)
8095 return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
8096 (__v4si) __B,
8097 (__v4si) __W, __M);
8100 extern __inline __m128i
8101 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8102 _mm_maskz_min_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
8104 return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
8105 (__v4si) __B,
8106 (__v4si)
8107 _mm_setzero_si128 (),
8108 __M);
8111 extern __inline __m128i
8112 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8113 _mm_mask_min_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
8114 __m128i __B)
8116 return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
8117 (__v4si) __B,
8118 (__v4si) __W, __M);
8121 extern __inline __m128i
8122 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8123 _mm_maskz_max_epu32 (__mmask8 __M, __m128i __A, __m128i __B)
8125 return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
8126 (__v4si) __B,
8127 (__v4si)
8128 _mm_setzero_si128 (),
8129 __M);
8132 extern __inline __m128i
8133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8134 _mm_mask_max_epu32 (__m128i __W, __mmask8 __M, __m128i __A,
8135 __m128i __B)
8137 return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
8138 (__v4si) __B,
8139 (__v4si) __W, __M);
8142 extern __inline __m128i
8143 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8144 _mm_maskz_min_epu32 (__mmask8 __M, __m128i __A, __m128i __B)
8146 return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
8147 (__v4si) __B,
8148 (__v4si)
8149 _mm_setzero_si128 (),
8150 __M);
8153 extern __inline __m128i
8154 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8155 _mm_mask_min_epu32 (__m128i __W, __mmask8 __M, __m128i __A,
8156 __m128i __B)
8158 return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
8159 (__v4si) __B,
8160 (__v4si) __W, __M);
8163 #ifndef __AVX512CD__
8164 #pragma GCC push_options
8165 #pragma GCC target("avx512vl,avx512cd")
8166 #define __DISABLE_AVX512VLCD__
8167 #endif
8169 extern __inline __m128i
8170 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8171 _mm_broadcastmb_epi64 (__mmask8 __A)
8173 return (__m128i) __builtin_ia32_broadcastmb128 (__A);
8176 extern __inline __m256i
8177 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8178 _mm256_broadcastmb_epi64 (__mmask8 __A)
8180 return (__m256i) __builtin_ia32_broadcastmb256 (__A);
8183 extern __inline __m128i
8184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8185 _mm_broadcastmw_epi32 (__mmask16 __A)
8187 return (__m128i) __builtin_ia32_broadcastmw128 (__A);
8190 extern __inline __m256i
8191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8192 _mm256_broadcastmw_epi32 (__mmask16 __A)
8194 return (__m256i) __builtin_ia32_broadcastmw256 (__A);
8197 extern __inline __m256i
8198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8199 _mm256_lzcnt_epi32 (__m256i __A)
8201 return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
8202 (__v8si)
8203 _mm256_setzero_si256 (),
8204 (__mmask8) -1);
8207 extern __inline __m256i
8208 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8209 _mm256_mask_lzcnt_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
8211 return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
8212 (__v8si) __W,
8213 (__mmask8) __U);
8216 extern __inline __m256i
8217 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8218 _mm256_maskz_lzcnt_epi32 (__mmask8 __U, __m256i __A)
8220 return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
8221 (__v8si)
8222 _mm256_setzero_si256 (),
8223 (__mmask8) __U);
8226 extern __inline __m256i
8227 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8228 _mm256_lzcnt_epi64 (__m256i __A)
8230 return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
8231 (__v4di)
8232 _mm256_setzero_si256 (),
8233 (__mmask8) -1);
8236 extern __inline __m256i
8237 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8238 _mm256_mask_lzcnt_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
8240 return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
8241 (__v4di) __W,
8242 (__mmask8) __U);
8245 extern __inline __m256i
8246 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8247 _mm256_maskz_lzcnt_epi64 (__mmask8 __U, __m256i __A)
8249 return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
8250 (__v4di)
8251 _mm256_setzero_si256 (),
8252 (__mmask8) __U);
8255 extern __inline __m256i
8256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8257 _mm256_conflict_epi64 (__m256i __A)
8259 return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
8260 (__v4di)
8261 _mm256_setzero_si256 (),
8262 (__mmask8) -
8266 extern __inline __m256i
8267 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8268 _mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
8270 return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
8271 (__v4di) __W,
8272 (__mmask8)
8273 __U);
8276 extern __inline __m256i
8277 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8278 _mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A)
8280 return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
8281 (__v4di)
8282 _mm256_setzero_si256 (),
8283 (__mmask8)
8284 __U);
8287 extern __inline __m256i
8288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8289 _mm256_conflict_epi32 (__m256i __A)
8291 return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
8292 (__v8si)
8293 _mm256_setzero_si256 (),
8294 (__mmask8) -
8298 extern __inline __m256i
8299 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8300 _mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
8302 return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
8303 (__v8si) __W,
8304 (__mmask8)
8305 __U);
8308 extern __inline __m256i
8309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8310 _mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A)
8312 return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
8313 (__v8si)
8314 _mm256_setzero_si256 (),
8315 (__mmask8)
8316 __U);
8319 extern __inline __m128i
8320 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8321 _mm_lzcnt_epi32 (__m128i __A)
8323 return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
8324 (__v4si)
8325 _mm_setzero_si128 (),
8326 (__mmask8) -1);
8329 extern __inline __m128i
8330 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8331 _mm_mask_lzcnt_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
8333 return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
8334 (__v4si) __W,
8335 (__mmask8) __U);
8338 extern __inline __m128i
8339 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8340 _mm_maskz_lzcnt_epi32 (__mmask8 __U, __m128i __A)
8342 return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
8343 (__v4si)
8344 _mm_setzero_si128 (),
8345 (__mmask8) __U);
8348 extern __inline __m128i
8349 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8350 _mm_lzcnt_epi64 (__m128i __A)
8352 return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
8353 (__v2di)
8354 _mm_setzero_di (),
8355 (__mmask8) -1);
8358 extern __inline __m128i
8359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8360 _mm_mask_lzcnt_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
8362 return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
8363 (__v2di) __W,
8364 (__mmask8) __U);
8367 extern __inline __m128i
8368 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8369 _mm_maskz_lzcnt_epi64 (__mmask8 __U, __m128i __A)
8371 return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
8372 (__v2di)
8373 _mm_setzero_di (),
8374 (__mmask8) __U);
8377 extern __inline __m128i
8378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8379 _mm_conflict_epi64 (__m128i __A)
8381 return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
8382 (__v2di)
8383 _mm_setzero_di (),
8384 (__mmask8) -
8388 extern __inline __m128i
8389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8390 _mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
8392 return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
8393 (__v2di) __W,
8394 (__mmask8)
8395 __U);
8398 extern __inline __m128i
8399 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8400 _mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A)
8402 return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
8403 (__v2di)
8404 _mm_setzero_di (),
8405 (__mmask8)
8406 __U);
8409 extern __inline __m128i
8410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8411 _mm_conflict_epi32 (__m128i __A)
8413 return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
8414 (__v4si)
8415 _mm_setzero_si128 (),
8416 (__mmask8) -
8420 extern __inline __m128i
8421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8422 _mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
8424 return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
8425 (__v4si) __W,
8426 (__mmask8)
8427 __U);
8430 extern __inline __m128i
8431 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8432 _mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A)
8434 return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
8435 (__v4si)
8436 _mm_setzero_si128 (),
8437 (__mmask8)
8438 __U);
8441 #ifdef __DISABLE_AVX512VLCD__
8442 #pragma GCC pop_options
8443 #endif
8445 extern __inline __m256d
8446 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8447 _mm256_mask_unpacklo_pd (__m256d __W, __mmask8 __U, __m256d __A,
8448 __m256d __B)
8450 return (__m256d) __builtin_ia32_unpcklpd256_mask ((__v4df) __A,
8451 (__v4df) __B,
8452 (__v4df) __W,
8453 (__mmask8) __U);
8456 extern __inline __m256d
8457 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8458 _mm256_maskz_unpacklo_pd (__mmask8 __U, __m256d __A, __m256d __B)
8460 return (__m256d) __builtin_ia32_unpcklpd256_mask ((__v4df) __A,
8461 (__v4df) __B,
8462 (__v4df)
8463 _mm256_setzero_pd (),
8464 (__mmask8) __U);
8467 extern __inline __m128d
8468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8469 _mm_mask_unpacklo_pd (__m128d __W, __mmask8 __U, __m128d __A,
8470 __m128d __B)
8472 return (__m128d) __builtin_ia32_unpcklpd128_mask ((__v2df) __A,
8473 (__v2df) __B,
8474 (__v2df) __W,
8475 (__mmask8) __U);
8478 extern __inline __m128d
8479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8480 _mm_maskz_unpacklo_pd (__mmask8 __U, __m128d __A, __m128d __B)
8482 return (__m128d) __builtin_ia32_unpcklpd128_mask ((__v2df) __A,
8483 (__v2df) __B,
8484 (__v2df)
8485 _mm_setzero_pd (),
8486 (__mmask8) __U);
8489 extern __inline __m256
8490 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8491 _mm256_mask_unpacklo_ps (__m256 __W, __mmask8 __U, __m256 __A,
8492 __m256 __B)
8494 return (__m256) __builtin_ia32_unpcklps256_mask ((__v8sf) __A,
8495 (__v8sf) __B,
8496 (__v8sf) __W,
8497 (__mmask8) __U);
8500 extern __inline __m256d
8501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8502 _mm256_mask_unpackhi_pd (__m256d __W, __mmask8 __U, __m256d __A,
8503 __m256d __B)
8505 return (__m256d) __builtin_ia32_unpckhpd256_mask ((__v4df) __A,
8506 (__v4df) __B,
8507 (__v4df) __W,
8508 (__mmask8) __U);
8511 extern __inline __m256d
8512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8513 _mm256_maskz_unpackhi_pd (__mmask8 __U, __m256d __A, __m256d __B)
8515 return (__m256d) __builtin_ia32_unpckhpd256_mask ((__v4df) __A,
8516 (__v4df) __B,
8517 (__v4df)
8518 _mm256_setzero_pd (),
8519 (__mmask8) __U);
8522 extern __inline __m128d
8523 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8524 _mm_mask_unpackhi_pd (__m128d __W, __mmask8 __U, __m128d __A,
8525 __m128d __B)
8527 return (__m128d) __builtin_ia32_unpckhpd128_mask ((__v2df) __A,
8528 (__v2df) __B,
8529 (__v2df) __W,
8530 (__mmask8) __U);
8533 extern __inline __m128d
8534 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8535 _mm_maskz_unpackhi_pd (__mmask8 __U, __m128d __A, __m128d __B)
8537 return (__m128d) __builtin_ia32_unpckhpd128_mask ((__v2df) __A,
8538 (__v2df) __B,
8539 (__v2df)
8540 _mm_setzero_pd (),
8541 (__mmask8) __U);
8544 extern __inline __m256
8545 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8546 _mm256_mask_unpackhi_ps (__m256 __W, __mmask8 __U, __m256 __A,
8547 __m256 __B)
8549 return (__m256) __builtin_ia32_unpckhps256_mask ((__v8sf) __A,
8550 (__v8sf) __B,
8551 (__v8sf) __W,
8552 (__mmask8) __U);
8555 extern __inline __m256
8556 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8557 _mm256_maskz_unpackhi_ps (__mmask8 __U, __m256 __A, __m256 __B)
8559 return (__m256) __builtin_ia32_unpckhps256_mask ((__v8sf) __A,
8560 (__v8sf) __B,
8561 (__v8sf)
8562 _mm256_setzero_ps (),
8563 (__mmask8) __U);
8566 extern __inline __m128
8567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8568 _mm_mask_unpackhi_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8570 return (__m128) __builtin_ia32_unpckhps128_mask ((__v4sf) __A,
8571 (__v4sf) __B,
8572 (__v4sf) __W,
8573 (__mmask8) __U);
8576 extern __inline __m128
8577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8578 _mm_maskz_unpackhi_ps (__mmask8 __U, __m128 __A, __m128 __B)
8580 return (__m128) __builtin_ia32_unpckhps128_mask ((__v4sf) __A,
8581 (__v4sf) __B,
8582 (__v4sf)
8583 _mm_setzero_ps (),
8584 (__mmask8) __U);
8587 extern __inline __m128
8588 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8589 _mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A)
8591 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8592 (__v4sf) __W,
8593 (__mmask8) __U);
8596 extern __inline __m128
8597 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8598 _mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8600 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8601 (__v4sf)
8602 _mm_setzero_ps (),
8603 (__mmask8) __U);
8606 extern __inline __m256
8607 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8608 _mm256_maskz_unpacklo_ps (__mmask8 __U, __m256 __A, __m256 __B)
8610 return (__m256) __builtin_ia32_unpcklps256_mask ((__v8sf) __A,
8611 (__v8sf) __B,
8612 (__v8sf)
8613 _mm256_setzero_ps (),
8614 (__mmask8) __U);
8617 extern __inline __m256
8618 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8619 _mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A)
8621 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8622 (__v8sf) __W,
8623 (__mmask8) __U);
8626 extern __inline __m256
8627 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8628 _mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8630 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8631 (__v8sf)
8632 _mm256_setzero_ps (),
8633 (__mmask8) __U);
8636 extern __inline __m128
8637 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8638 _mm_mask_unpacklo_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8640 return (__m128) __builtin_ia32_unpcklps128_mask ((__v4sf) __A,
8641 (__v4sf) __B,
8642 (__v4sf) __W,
8643 (__mmask8) __U);
8646 extern __inline __m128
8647 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8648 _mm_maskz_unpacklo_ps (__mmask8 __U, __m128 __A, __m128 __B)
8650 return (__m128) __builtin_ia32_unpcklps128_mask ((__v4sf) __A,
8651 (__v4sf) __B,
8652 (__v4sf)
8653 _mm_setzero_ps (),
8654 (__mmask8) __U);
8657 extern __inline __m256i
8658 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8659 _mm256_mask_sra_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
8660 __m128i __B)
8662 return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A,
8663 (__v4si) __B,
8664 (__v8si) __W,
8665 (__mmask8) __U);
8668 extern __inline __m256i
8669 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8670 _mm256_maskz_sra_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
8672 return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A,
8673 (__v4si) __B,
8674 (__v8si)
8675 _mm256_setzero_si256 (),
8676 (__mmask8) __U);
8679 extern __inline __m128i
8680 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8681 _mm_mask_sra_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
8682 __m128i __B)
8684 return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A,
8685 (__v4si) __B,
8686 (__v4si) __W,
8687 (__mmask8) __U);
8690 extern __inline __m128i
8691 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8692 _mm_maskz_sra_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
8694 return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A,
8695 (__v4si) __B,
8696 (__v4si)
8697 _mm_setzero_si128 (),
8698 (__mmask8) __U);
8701 extern __inline __m256i
8702 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8703 _mm256_sra_epi64 (__m256i __A, __m128i __B)
8705 return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
8706 (__v2di) __B,
8707 (__v4di)
8708 _mm256_setzero_si256 (),
8709 (__mmask8) -1);
8712 extern __inline __m256i
8713 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8714 _mm256_mask_sra_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
8715 __m128i __B)
8717 return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
8718 (__v2di) __B,
8719 (__v4di) __W,
8720 (__mmask8) __U);
8723 extern __inline __m256i
8724 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8725 _mm256_maskz_sra_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
8727 return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
8728 (__v2di) __B,
8729 (__v4di)
8730 _mm256_setzero_si256 (),
8731 (__mmask8) __U);
8734 extern __inline __m128i
8735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8736 _mm_sra_epi64 (__m128i __A, __m128i __B)
8738 return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
8739 (__v2di) __B,
8740 (__v2di)
8741 _mm_setzero_di (),
8742 (__mmask8) -1);
8745 extern __inline __m128i
8746 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8747 _mm_mask_sra_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
8748 __m128i __B)
8750 return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
8751 (__v2di) __B,
8752 (__v2di) __W,
8753 (__mmask8) __U);
8756 extern __inline __m128i
8757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8758 _mm_maskz_sra_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
8760 return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
8761 (__v2di) __B,
8762 (__v2di)
8763 _mm_setzero_di (),
8764 (__mmask8) __U);
8767 extern __inline __m128i
8768 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8769 _mm_mask_sll_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
8770 __m128i __B)
8772 return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A,
8773 (__v4si) __B,
8774 (__v4si) __W,
8775 (__mmask8) __U);
8778 extern __inline __m128i
8779 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8780 _mm_maskz_sll_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
8782 return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A,
8783 (__v4si) __B,
8784 (__v4si)
8785 _mm_setzero_si128 (),
8786 (__mmask8) __U);
8789 extern __inline __m128i
8790 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8791 _mm_mask_sll_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
8792 __m128i __B)
8794 return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
8795 (__v2di) __B,
8796 (__v2di) __W,
8797 (__mmask8) __U);
8800 extern __inline __m128i
8801 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8802 _mm_maskz_sll_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
8804 return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
8805 (__v2di) __B,
8806 (__v2di)
8807 _mm_setzero_di (),
8808 (__mmask8) __U);
8811 extern __inline __m256i
8812 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8813 _mm256_mask_sll_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
8814 __m128i __B)
8816 return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A,
8817 (__v4si) __B,
8818 (__v8si) __W,
8819 (__mmask8) __U);
8822 extern __inline __m256i
8823 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8824 _mm256_maskz_sll_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
8826 return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A,
8827 (__v4si) __B,
8828 (__v8si)
8829 _mm256_setzero_si256 (),
8830 (__mmask8) __U);
8833 extern __inline __m256i
8834 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8835 _mm256_mask_sll_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
8836 __m128i __B)
8838 return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A,
8839 (__v2di) __B,
8840 (__v4di) __W,
8841 (__mmask8) __U);
8844 extern __inline __m256i
8845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8846 _mm256_maskz_sll_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
8848 return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A,
8849 (__v2di) __B,
8850 (__v4di)
8851 _mm256_setzero_si256 (),
8852 (__mmask8) __U);
8855 extern __inline __m256
8856 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8857 _mm256_mask_permutexvar_ps (__m256 __W, __mmask8 __U, __m256i __X,
8858 __m256 __Y)
8860 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
8861 (__v8si) __X,
8862 (__v8sf) __W,
8863 (__mmask8) __U);
8866 extern __inline __m256
8867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8868 _mm256_maskz_permutexvar_ps (__mmask8 __U, __m256i __X, __m256 __Y)
8870 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
8871 (__v8si) __X,
8872 (__v8sf)
8873 _mm256_setzero_ps (),
8874 (__mmask8) __U);
8877 extern __inline __m256d
8878 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8879 _mm256_permutexvar_pd (__m256i __X, __m256d __Y)
8881 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8882 (__v4di) __X,
8883 (__v4df)
8884 _mm256_setzero_pd (),
8885 (__mmask8) -1);
8888 extern __inline __m256d
8889 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8890 _mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X,
8891 __m256d __Y)
8893 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8894 (__v4di) __X,
8895 (__v4df) __W,
8896 (__mmask8) __U);
8899 extern __inline __m256d
8900 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8901 _mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y)
8903 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8904 (__v4di) __X,
8905 (__v4df)
8906 _mm256_setzero_pd (),
8907 (__mmask8) __U);
8910 extern __inline __m256d
8911 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8912 _mm256_mask_permutevar_pd (__m256d __W, __mmask8 __U, __m256d __A,
8913 __m256i __C)
8915 return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
8916 (__v4di) __C,
8917 (__v4df) __W,
8918 (__mmask8)
8919 __U);
8922 extern __inline __m256d
8923 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8924 _mm256_maskz_permutevar_pd (__mmask8 __U, __m256d __A, __m256i __C)
8926 return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
8927 (__v4di) __C,
8928 (__v4df)
8929 _mm256_setzero_pd (),
8930 (__mmask8)
8931 __U);
8934 extern __inline __m256
8935 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8936 _mm256_mask_permutevar_ps (__m256 __W, __mmask8 __U, __m256 __A,
8937 __m256i __C)
8939 return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
8940 (__v8si) __C,
8941 (__v8sf) __W,
8942 (__mmask8) __U);
8945 extern __inline __m256
8946 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8947 _mm256_maskz_permutevar_ps (__mmask8 __U, __m256 __A, __m256i __C)
8949 return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
8950 (__v8si) __C,
8951 (__v8sf)
8952 _mm256_setzero_ps (),
8953 (__mmask8) __U);
8956 extern __inline __m128d
8957 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8958 _mm_mask_permutevar_pd (__m128d __W, __mmask8 __U, __m128d __A,
8959 __m128i __C)
8961 return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
8962 (__v2di) __C,
8963 (__v2df) __W,
8964 (__mmask8) __U);
8967 extern __inline __m128d
8968 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8969 _mm_maskz_permutevar_pd (__mmask8 __U, __m128d __A, __m128i __C)
8971 return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
8972 (__v2di) __C,
8973 (__v2df)
8974 _mm_setzero_pd (),
8975 (__mmask8) __U);
8978 extern __inline __m128
8979 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8980 _mm_mask_permutevar_ps (__m128 __W, __mmask8 __U, __m128 __A,
8981 __m128i __C)
8983 return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
8984 (__v4si) __C,
8985 (__v4sf) __W,
8986 (__mmask8) __U);
8989 extern __inline __m128
8990 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8991 _mm_maskz_permutevar_ps (__mmask8 __U, __m128 __A, __m128i __C)
8993 return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
8994 (__v4si) __C,
8995 (__v4sf)
8996 _mm_setzero_ps (),
8997 (__mmask8) __U);
9000 extern __inline __m256i
9001 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9002 _mm256_maskz_mullo_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
9004 return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
9005 (__v8si) __B,
9006 (__v8si)
9007 _mm256_setzero_si256 (),
9008 __M);
9011 extern __inline __m256i
9012 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9013 _mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y)
9015 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
9016 (__v4di) __X,
9017 (__v4di)
9018 _mm256_setzero_si256 (),
9019 __M);
9022 extern __inline __m256i
9023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9024 _mm256_mask_mullo_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
9025 __m256i __B)
9027 return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
9028 (__v8si) __B,
9029 (__v8si) __W, __M);
9032 extern __inline __m128i
9033 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9034 _mm_maskz_mullo_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
9036 return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
9037 (__v4si) __B,
9038 (__v4si)
9039 _mm_setzero_si128 (),
9040 __M);
9043 extern __inline __m128i
9044 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9045 _mm_mask_mullo_epi32 (__m128i __W, __mmask16 __M, __m128i __A,
9046 __m128i __B)
9048 return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
9049 (__v4si) __B,
9050 (__v4si) __W, __M);
9053 extern __inline __m256i
9054 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9055 _mm256_mask_mul_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
9056 __m256i __Y)
9058 return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
9059 (__v8si) __Y,
9060 (__v4di) __W, __M);
9063 extern __inline __m256i
9064 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9065 _mm256_maskz_mul_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
9067 return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
9068 (__v8si) __Y,
9069 (__v4di)
9070 _mm256_setzero_si256 (),
9071 __M);
9074 extern __inline __m128i
9075 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9076 _mm_mask_mul_epi32 (__m128i __W, __mmask8 __M, __m128i __X,
9077 __m128i __Y)
9079 return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
9080 (__v4si) __Y,
9081 (__v2di) __W, __M);
9084 extern __inline __m128i
9085 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9086 _mm_maskz_mul_epi32 (__mmask8 __M, __m128i __X, __m128i __Y)
9088 return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
9089 (__v4si) __Y,
9090 (__v2di)
9091 _mm_setzero_si128 (),
9092 __M);
9095 extern __inline __m256i
9096 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9097 _mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X,
9098 __m256i __Y)
9100 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
9101 (__v4di) __X,
9102 (__v4di) __W,
9103 __M);
9106 extern __inline __m256i
9107 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9108 _mm256_mask_mul_epu32 (__m256i __W, __mmask8 __M, __m256i __X,
9109 __m256i __Y)
9111 return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
9112 (__v8si) __Y,
9113 (__v4di) __W, __M);
9116 extern __inline __m256i
9117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9118 _mm256_maskz_permutexvar_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
9120 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
9121 (__v8si) __X,
9122 (__v8si)
9123 _mm256_setzero_si256 (),
9124 __M);
9127 extern __inline __m256i
9128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9129 _mm256_maskz_mul_epu32 (__mmask8 __M, __m256i __X, __m256i __Y)
9131 return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
9132 (__v8si) __Y,
9133 (__v4di)
9134 _mm256_setzero_si256 (),
9135 __M);
9138 extern __inline __m128i
9139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9140 _mm_mask_mul_epu32 (__m128i __W, __mmask8 __M, __m128i __X,
9141 __m128i __Y)
9143 return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
9144 (__v4si) __Y,
9145 (__v2di) __W, __M);
9148 extern __inline __m128i
9149 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9150 _mm_maskz_mul_epu32 (__mmask8 __M, __m128i __X, __m128i __Y)
9152 return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
9153 (__v4si) __Y,
9154 (__v2di)
9155 _mm_setzero_si128 (),
9156 __M);
9159 extern __inline __m256i
9160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9161 _mm256_mask_permutexvar_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
9162 __m256i __Y)
9164 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
9165 (__v8si) __X,
9166 (__v8si) __W,
9167 __M);
9170 #ifdef __OPTIMIZE__
9171 extern __inline __m256i
9172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9173 _mm256_mask_permutex_epi64 (__m256i __W, __mmask8 __M,
9174 __m256i __X, const int __I)
9176 return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
9177 __I,
9178 (__v4di) __W,
9179 (__mmask8) __M);
9182 extern __inline __m256i
9183 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9184 _mm256_maskz_permutex_epi64 (__mmask8 __M, __m256i __X, const int __I)
9186 return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
9187 __I,
9188 (__v4di)
9189 _mm256_setzero_si256 (),
9190 (__mmask8) __M);
9193 extern __inline __m256d
9194 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9195 _mm256_mask_shuffle_pd (__m256d __W, __mmask8 __U, __m256d __A,
9196 __m256d __B, const int __imm)
9198 return (__m256d) __builtin_ia32_shufpd256_mask ((__v4df) __A,
9199 (__v4df) __B, __imm,
9200 (__v4df) __W,
9201 (__mmask8) __U);
9204 extern __inline __m256d
9205 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9206 _mm256_maskz_shuffle_pd (__mmask8 __U, __m256d __A, __m256d __B,
9207 const int __imm)
9209 return (__m256d) __builtin_ia32_shufpd256_mask ((__v4df) __A,
9210 (__v4df) __B, __imm,
9211 (__v4df)
9212 _mm256_setzero_pd (),
9213 (__mmask8) __U);
9216 extern __inline __m128d
9217 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9218 _mm_mask_shuffle_pd (__m128d __W, __mmask8 __U, __m128d __A,
9219 __m128d __B, const int __imm)
9221 return (__m128d) __builtin_ia32_shufpd128_mask ((__v2df) __A,
9222 (__v2df) __B, __imm,
9223 (__v2df) __W,
9224 (__mmask8) __U);
9227 extern __inline __m128d
9228 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9229 _mm_maskz_shuffle_pd (__mmask8 __U, __m128d __A, __m128d __B,
9230 const int __imm)
9232 return (__m128d) __builtin_ia32_shufpd128_mask ((__v2df) __A,
9233 (__v2df) __B, __imm,
9234 (__v2df)
9235 _mm_setzero_pd (),
9236 (__mmask8) __U);
9239 extern __inline __m256
9240 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9241 _mm256_mask_shuffle_ps (__m256 __W, __mmask8 __U, __m256 __A,
9242 __m256 __B, const int __imm)
9244 return (__m256) __builtin_ia32_shufps256_mask ((__v8sf) __A,
9245 (__v8sf) __B, __imm,
9246 (__v8sf) __W,
9247 (__mmask8) __U);
9250 extern __inline __m256
9251 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9252 _mm256_maskz_shuffle_ps (__mmask8 __U, __m256 __A, __m256 __B,
9253 const int __imm)
9255 return (__m256) __builtin_ia32_shufps256_mask ((__v8sf) __A,
9256 (__v8sf) __B, __imm,
9257 (__v8sf)
9258 _mm256_setzero_ps (),
9259 (__mmask8) __U);
9262 extern __inline __m128
9263 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9264 _mm_mask_shuffle_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
9265 const int __imm)
9267 return (__m128) __builtin_ia32_shufps128_mask ((__v4sf) __A,
9268 (__v4sf) __B, __imm,
9269 (__v4sf) __W,
9270 (__mmask8) __U);
9273 extern __inline __m128
9274 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9275 _mm_maskz_shuffle_ps (__mmask8 __U, __m128 __A, __m128 __B,
9276 const int __imm)
9278 return (__m128) __builtin_ia32_shufps128_mask ((__v4sf) __A,
9279 (__v4sf) __B, __imm,
9280 (__v4sf)
9281 _mm_setzero_ps (),
9282 (__mmask8) __U);
9285 extern __inline __m256i
9286 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9287 _mm256_inserti32x4 (__m256i __A, __m128i __B, const int __imm)
9289 return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
9290 (__v4si) __B,
9291 __imm,
9292 (__v8si)
9293 _mm256_setzero_si256 (),
9294 (__mmask8) -
9298 extern __inline __m256i
9299 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9300 _mm256_mask_inserti32x4 (__m256i __W, __mmask8 __U, __m256i __A,
9301 __m128i __B, const int __imm)
9303 return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
9304 (__v4si) __B,
9305 __imm,
9306 (__v8si) __W,
9307 (__mmask8)
9308 __U);
9311 extern __inline __m256i
9312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9313 _mm256_maskz_inserti32x4 (__mmask8 __U, __m256i __A, __m128i __B,
9314 const int __imm)
9316 return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
9317 (__v4si) __B,
9318 __imm,
9319 (__v8si)
9320 _mm256_setzero_si256 (),
9321 (__mmask8)
9322 __U);
9325 extern __inline __m256
9326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9327 _mm256_insertf32x4 (__m256 __A, __m128 __B, const int __imm)
9329 return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
9330 (__v4sf) __B,
9331 __imm,
9332 (__v8sf)
9333 _mm256_setzero_ps (),
9334 (__mmask8) -1);
9337 extern __inline __m256
9338 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9339 _mm256_mask_insertf32x4 (__m256 __W, __mmask8 __U, __m256 __A,
9340 __m128 __B, const int __imm)
9342 return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
9343 (__v4sf) __B,
9344 __imm,
9345 (__v8sf) __W,
9346 (__mmask8) __U);
9349 extern __inline __m256
9350 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9351 _mm256_maskz_insertf32x4 (__mmask8 __U, __m256 __A, __m128 __B,
9352 const int __imm)
9354 return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
9355 (__v4sf) __B,
9356 __imm,
9357 (__v8sf)
9358 _mm256_setzero_ps (),
9359 (__mmask8) __U);
9362 extern __inline __m128i
9363 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9364 _mm256_extracti32x4_epi32 (__m256i __A, const int __imm)
9366 return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
9367 __imm,
9368 (__v4si)
9369 _mm_setzero_si128 (),
9370 (__mmask8) -
9374 extern __inline __m128i
9375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9376 _mm256_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m256i __A,
9377 const int __imm)
9379 return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
9380 __imm,
9381 (__v4si) __W,
9382 (__mmask8)
9383 __U);
9386 extern __inline __m128i
9387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9388 _mm256_maskz_extracti32x4_epi32 (__mmask8 __U, __m256i __A,
9389 const int __imm)
9391 return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
9392 __imm,
9393 (__v4si)
9394 _mm_setzero_si128 (),
9395 (__mmask8)
9396 __U);
9399 extern __inline __m128
9400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9401 _mm256_extractf32x4_ps (__m256 __A, const int __imm)
9403 return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
9404 __imm,
9405 (__v4sf)
9406 _mm_setzero_ps (),
9407 (__mmask8) -
9411 extern __inline __m128
9412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9413 _mm256_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m256 __A,
9414 const int __imm)
9416 return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
9417 __imm,
9418 (__v4sf) __W,
9419 (__mmask8)
9420 __U);
9423 extern __inline __m128
9424 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9425 _mm256_maskz_extractf32x4_ps (__mmask8 __U, __m256 __A,
9426 const int __imm)
9428 return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
9429 __imm,
9430 (__v4sf)
9431 _mm_setzero_ps (),
9432 (__mmask8)
9433 __U);
9436 extern __inline __m256i
9437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9438 _mm256_shuffle_i64x2 (__m256i __A, __m256i __B, const int __imm)
9440 return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
9441 (__v4di) __B,
9442 __imm,
9443 (__v4di)
9444 _mm256_setzero_si256 (),
9445 (__mmask8) -1);
9448 extern __inline __m256i
9449 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9450 _mm256_mask_shuffle_i64x2 (__m256i __W, __mmask8 __U, __m256i __A,
9451 __m256i __B, const int __imm)
9453 return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
9454 (__v4di) __B,
9455 __imm,
9456 (__v4di) __W,
9457 (__mmask8) __U);
9460 extern __inline __m256i
9461 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9462 _mm256_maskz_shuffle_i64x2 (__mmask8 __U, __m256i __A, __m256i __B,
9463 const int __imm)
9465 return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
9466 (__v4di) __B,
9467 __imm,
9468 (__v4di)
9469 _mm256_setzero_si256 (),
9470 (__mmask8) __U);
9473 extern __inline __m256i
9474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9475 _mm256_shuffle_i32x4 (__m256i __A, __m256i __B, const int __imm)
9477 return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
9478 (__v8si) __B,
9479 __imm,
9480 (__v8si)
9481 _mm256_setzero_si256 (),
9482 (__mmask8) -1);
9485 extern __inline __m256i
9486 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9487 _mm256_mask_shuffle_i32x4 (__m256i __W, __mmask8 __U, __m256i __A,
9488 __m256i __B, const int __imm)
9490 return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
9491 (__v8si) __B,
9492 __imm,
9493 (__v8si) __W,
9494 (__mmask8) __U);
9497 extern __inline __m256i
9498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9499 _mm256_maskz_shuffle_i32x4 (__mmask8 __U, __m256i __A, __m256i __B,
9500 const int __imm)
9502 return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
9503 (__v8si) __B,
9504 __imm,
9505 (__v8si)
9506 _mm256_setzero_si256 (),
9507 (__mmask8) __U);
9510 extern __inline __m256d
9511 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9512 _mm256_shuffle_f64x2 (__m256d __A, __m256d __B, const int __imm)
9514 return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
9515 (__v4df) __B,
9516 __imm,
9517 (__v4df)
9518 _mm256_setzero_pd (),
9519 (__mmask8) -1);
9522 extern __inline __m256d
9523 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9524 _mm256_mask_shuffle_f64x2 (__m256d __W, __mmask8 __U, __m256d __A,
9525 __m256d __B, const int __imm)
9527 return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
9528 (__v4df) __B,
9529 __imm,
9530 (__v4df) __W,
9531 (__mmask8) __U);
9534 extern __inline __m256d
9535 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9536 _mm256_maskz_shuffle_f64x2 (__mmask8 __U, __m256d __A, __m256d __B,
9537 const int __imm)
9539 return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
9540 (__v4df) __B,
9541 __imm,
9542 (__v4df)
9543 _mm256_setzero_pd (),
9544 (__mmask8) __U);
9547 extern __inline __m256
9548 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9549 _mm256_shuffle_f32x4 (__m256 __A, __m256 __B, const int __imm)
9551 return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
9552 (__v8sf) __B,
9553 __imm,
9554 (__v8sf)
9555 _mm256_setzero_ps (),
9556 (__mmask8) -1);
9559 extern __inline __m256
9560 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9561 _mm256_mask_shuffle_f32x4 (__m256 __W, __mmask8 __U, __m256 __A,
9562 __m256 __B, const int __imm)
9564 return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
9565 (__v8sf) __B,
9566 __imm,
9567 (__v8sf) __W,
9568 (__mmask8) __U);
9571 extern __inline __m256
9572 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9573 _mm256_maskz_shuffle_f32x4 (__mmask8 __U, __m256 __A, __m256 __B,
9574 const int __imm)
9576 return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
9577 (__v8sf) __B,
9578 __imm,
9579 (__v8sf)
9580 _mm256_setzero_ps (),
9581 (__mmask8) __U);
9584 extern __inline __m256d
9585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9586 _mm256_fixupimm_pd (__m256d __A, __m256d __B, __m256i __C,
9587 const int __imm)
9589 return (__m256d) __builtin_ia32_fixupimmpd256_mask ((__v4df) __A,
9590 (__v4df) __B,
9591 (__v4di) __C,
9592 __imm,
9593 (__mmask8) -1);
9596 extern __inline __m256d
9597 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9598 _mm256_mask_fixupimm_pd (__m256d __A, __mmask8 __U, __m256d __B,
9599 __m256i __C, const int __imm)
9601 return (__m256d) __builtin_ia32_fixupimmpd256_mask ((__v4df) __A,
9602 (__v4df) __B,
9603 (__v4di) __C,
9604 __imm,
9605 (__mmask8) __U);
9608 extern __inline __m256d
9609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9610 _mm256_maskz_fixupimm_pd (__mmask8 __U, __m256d __A, __m256d __B,
9611 __m256i __C, const int __imm)
9613 return (__m256d) __builtin_ia32_fixupimmpd256_maskz ((__v4df) __A,
9614 (__v4df) __B,
9615 (__v4di) __C,
9616 __imm,
9617 (__mmask8) __U);
9620 extern __inline __m256
9621 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9622 _mm256_fixupimm_ps (__m256 __A, __m256 __B, __m256i __C,
9623 const int __imm)
9625 return (__m256) __builtin_ia32_fixupimmps256_mask ((__v8sf) __A,
9626 (__v8sf) __B,
9627 (__v8si) __C,
9628 __imm,
9629 (__mmask8) -1);
9632 extern __inline __m256
9633 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9634 _mm256_mask_fixupimm_ps (__m256 __A, __mmask8 __U, __m256 __B,
9635 __m256i __C, const int __imm)
9637 return (__m256) __builtin_ia32_fixupimmps256_mask ((__v8sf) __A,
9638 (__v8sf) __B,
9639 (__v8si) __C,
9640 __imm,
9641 (__mmask8) __U);
9644 extern __inline __m256
9645 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9646 _mm256_maskz_fixupimm_ps (__mmask8 __U, __m256 __A, __m256 __B,
9647 __m256i __C, const int __imm)
9649 return (__m256) __builtin_ia32_fixupimmps256_maskz ((__v8sf) __A,
9650 (__v8sf) __B,
9651 (__v8si) __C,
9652 __imm,
9653 (__mmask8) __U);
9656 extern __inline __m128d
9657 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9658 _mm_fixupimm_pd (__m128d __A, __m128d __B, __m128i __C,
9659 const int __imm)
9661 return (__m128d) __builtin_ia32_fixupimmpd128_mask ((__v2df) __A,
9662 (__v2df) __B,
9663 (__v2di) __C,
9664 __imm,
9665 (__mmask8) -1);
9668 extern __inline __m128d
9669 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9670 _mm_mask_fixupimm_pd (__m128d __A, __mmask8 __U, __m128d __B,
9671 __m128i __C, const int __imm)
9673 return (__m128d) __builtin_ia32_fixupimmpd128_mask ((__v2df) __A,
9674 (__v2df) __B,
9675 (__v2di) __C,
9676 __imm,
9677 (__mmask8) __U);
9680 extern __inline __m128d
9681 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9682 _mm_maskz_fixupimm_pd (__mmask8 __U, __m128d __A, __m128d __B,
9683 __m128i __C, const int __imm)
9685 return (__m128d) __builtin_ia32_fixupimmpd128_maskz ((__v2df) __A,
9686 (__v2df) __B,
9687 (__v2di) __C,
9688 __imm,
9689 (__mmask8) __U);
9692 extern __inline __m128
9693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9694 _mm_fixupimm_ps (__m128 __A, __m128 __B, __m128i __C, const int __imm)
9696 return (__m128) __builtin_ia32_fixupimmps128_mask ((__v4sf) __A,
9697 (__v4sf) __B,
9698 (__v4si) __C,
9699 __imm,
9700 (__mmask8) -1);
9703 extern __inline __m128
9704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9705 _mm_mask_fixupimm_ps (__m128 __A, __mmask8 __U, __m128 __B,
9706 __m128i __C, const int __imm)
9708 return (__m128) __builtin_ia32_fixupimmps128_mask ((__v4sf) __A,
9709 (__v4sf) __B,
9710 (__v4si) __C,
9711 __imm,
9712 (__mmask8) __U);
9715 extern __inline __m128
9716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9717 _mm_maskz_fixupimm_ps (__mmask8 __U, __m128 __A, __m128 __B,
9718 __m128i __C, const int __imm)
9720 return (__m128) __builtin_ia32_fixupimmps128_maskz ((__v4sf) __A,
9721 (__v4sf) __B,
9722 (__v4si) __C,
9723 __imm,
9724 (__mmask8) __U);
9727 extern __inline __m256i
9728 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9729 _mm256_mask_srli_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
9730 const int __imm)
9732 return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm,
9733 (__v8si) __W,
9734 (__mmask8) __U);
9737 extern __inline __m256i
9738 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9739 _mm256_maskz_srli_epi32 (__mmask8 __U, __m256i __A, const int __imm)
9741 return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm,
9742 (__v8si)
9743 _mm256_setzero_si256 (),
9744 (__mmask8) __U);
9747 extern __inline __m128i
9748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9749 _mm_mask_srli_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
9750 const int __imm)
9752 return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm,
9753 (__v4si) __W,
9754 (__mmask8) __U);
9757 extern __inline __m128i
9758 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9759 _mm_maskz_srli_epi32 (__mmask8 __U, __m128i __A, const int __imm)
9761 return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm,
9762 (__v4si)
9763 _mm_setzero_si128 (),
9764 (__mmask8) __U);
9767 extern __inline __m256i
9768 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9769 _mm256_mask_srli_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
9770 const int __imm)
9772 return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm,
9773 (__v4di) __W,
9774 (__mmask8) __U);
9777 extern __inline __m256i
9778 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9779 _mm256_maskz_srli_epi64 (__mmask8 __U, __m256i __A, const int __imm)
9781 return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm,
9782 (__v4di)
9783 _mm256_setzero_si256 (),
9784 (__mmask8) __U);
9787 extern __inline __m128i
9788 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9789 _mm_mask_srli_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
9790 const int __imm)
9792 return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm,
9793 (__v2di) __W,
9794 (__mmask8) __U);
9797 extern __inline __m128i
9798 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9799 _mm_maskz_srli_epi64 (__mmask8 __U, __m128i __A, const int __imm)
9801 return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm,
9802 (__v2di)
9803 _mm_setzero_si128 (),
9804 (__mmask8) __U);
9807 extern __inline __m256i
9808 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9809 _mm256_ternarylogic_epi64 (__m256i __A, __m256i __B, __m256i __C,
9810 const int imm)
9812 return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A,
9813 (__v4di) __B,
9814 (__v4di) __C, imm,
9815 (__mmask8) -1);
9818 extern __inline __m256i
9819 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9820 _mm256_mask_ternarylogic_epi64 (__m256i __A, __mmask8 __U,
9821 __m256i __B, __m256i __C,
9822 const int imm)
9824 return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A,
9825 (__v4di) __B,
9826 (__v4di) __C, imm,
9827 (__mmask8) __U);
9830 extern __inline __m256i
9831 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9832 _mm256_maskz_ternarylogic_epi64 (__mmask8 __U, __m256i __A,
9833 __m256i __B, __m256i __C,
9834 const int imm)
9836 return (__m256i) __builtin_ia32_pternlogq256_maskz ((__v4di) __A,
9837 (__v4di) __B,
9838 (__v4di) __C,
9839 imm,
9840 (__mmask8) __U);
9843 extern __inline __m256i
9844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9845 _mm256_ternarylogic_epi32 (__m256i __A, __m256i __B, __m256i __C,
9846 const int imm)
9848 return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A,
9849 (__v8si) __B,
9850 (__v8si) __C, imm,
9851 (__mmask8) -1);
9854 extern __inline __m256i
9855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9856 _mm256_mask_ternarylogic_epi32 (__m256i __A, __mmask8 __U,
9857 __m256i __B, __m256i __C,
9858 const int imm)
9860 return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A,
9861 (__v8si) __B,
9862 (__v8si) __C, imm,
9863 (__mmask8) __U);
9866 extern __inline __m256i
9867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9868 _mm256_maskz_ternarylogic_epi32 (__mmask8 __U, __m256i __A,
9869 __m256i __B, __m256i __C,
9870 const int imm)
9872 return (__m256i) __builtin_ia32_pternlogd256_maskz ((__v8si) __A,
9873 (__v8si) __B,
9874 (__v8si) __C,
9875 imm,
9876 (__mmask8) __U);
9879 extern __inline __m128i
9880 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9881 _mm_ternarylogic_epi64 (__m128i __A, __m128i __B, __m128i __C,
9882 const int imm)
9884 return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A,
9885 (__v2di) __B,
9886 (__v2di) __C, imm,
9887 (__mmask8) -1);
9890 extern __inline __m128i
9891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9892 _mm_mask_ternarylogic_epi64 (__m128i __A, __mmask8 __U,
9893 __m128i __B, __m128i __C, const int imm)
9895 return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A,
9896 (__v2di) __B,
9897 (__v2di) __C, imm,
9898 (__mmask8) __U);
9901 extern __inline __m128i
9902 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9903 _mm_maskz_ternarylogic_epi64 (__mmask8 __U, __m128i __A,
9904 __m128i __B, __m128i __C, const int imm)
9906 return (__m128i) __builtin_ia32_pternlogq128_maskz ((__v2di) __A,
9907 (__v2di) __B,
9908 (__v2di) __C,
9909 imm,
9910 (__mmask8) __U);
9913 extern __inline __m128i
9914 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9915 _mm_ternarylogic_epi32 (__m128i __A, __m128i __B, __m128i __C,
9916 const int imm)
9918 return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A,
9919 (__v4si) __B,
9920 (__v4si) __C, imm,
9921 (__mmask8) -1);
9924 extern __inline __m128i
9925 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9926 _mm_mask_ternarylogic_epi32 (__m128i __A, __mmask8 __U,
9927 __m128i __B, __m128i __C, const int imm)
9929 return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A,
9930 (__v4si) __B,
9931 (__v4si) __C, imm,
9932 (__mmask8) __U);
9935 extern __inline __m128i
9936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9937 _mm_maskz_ternarylogic_epi32 (__mmask8 __U, __m128i __A,
9938 __m128i __B, __m128i __C, const int imm)
9940 return (__m128i) __builtin_ia32_pternlogd128_maskz ((__v4si) __A,
9941 (__v4si) __B,
9942 (__v4si) __C,
9943 imm,
9944 (__mmask8) __U);
9947 extern __inline __m256
9948 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9949 _mm256_roundscale_ps (__m256 __A, const int __imm)
9951 return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
9952 __imm,
9953 (__v8sf)
9954 _mm256_setzero_ps (),
9955 (__mmask8) -1);
9958 extern __inline __m256
9959 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9960 _mm256_mask_roundscale_ps (__m256 __W, __mmask8 __U, __m256 __A,
9961 const int __imm)
9963 return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
9964 __imm,
9965 (__v8sf) __W,
9966 (__mmask8) __U);
9969 extern __inline __m256
9970 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9971 _mm256_maskz_roundscale_ps (__mmask8 __U, __m256 __A, const int __imm)
9973 return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
9974 __imm,
9975 (__v8sf)
9976 _mm256_setzero_ps (),
9977 (__mmask8) __U);
9980 extern __inline __m256d
9981 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9982 _mm256_roundscale_pd (__m256d __A, const int __imm)
9984 return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
9985 __imm,
9986 (__v4df)
9987 _mm256_setzero_pd (),
9988 (__mmask8) -1);
9991 extern __inline __m256d
9992 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9993 _mm256_mask_roundscale_pd (__m256d __W, __mmask8 __U, __m256d __A,
9994 const int __imm)
9996 return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
9997 __imm,
9998 (__v4df) __W,
9999 (__mmask8) __U);
10002 extern __inline __m256d
10003 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10004 _mm256_maskz_roundscale_pd (__mmask8 __U, __m256d __A, const int __imm)
10006 return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
10007 __imm,
10008 (__v4df)
10009 _mm256_setzero_pd (),
10010 (__mmask8) __U);
10013 extern __inline __m128
10014 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10015 _mm_roundscale_ps (__m128 __A, const int __imm)
10017 return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
10018 __imm,
10019 (__v4sf)
10020 _mm_setzero_ps (),
10021 (__mmask8) -1);
10024 extern __inline __m128
10025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10026 _mm_mask_roundscale_ps (__m128 __W, __mmask8 __U, __m128 __A,
10027 const int __imm)
10029 return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
10030 __imm,
10031 (__v4sf) __W,
10032 (__mmask8) __U);
10035 extern __inline __m128
10036 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10037 _mm_maskz_roundscale_ps (__mmask8 __U, __m128 __A, const int __imm)
10039 return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
10040 __imm,
10041 (__v4sf)
10042 _mm_setzero_ps (),
10043 (__mmask8) __U);
10046 extern __inline __m128d
10047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10048 _mm_roundscale_pd (__m128d __A, const int __imm)
10050 return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
10051 __imm,
10052 (__v2df)
10053 _mm_setzero_pd (),
10054 (__mmask8) -1);
10057 extern __inline __m128d
10058 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10059 _mm_mask_roundscale_pd (__m128d __W, __mmask8 __U, __m128d __A,
10060 const int __imm)
10062 return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
10063 __imm,
10064 (__v2df) __W,
10065 (__mmask8) __U);
10068 extern __inline __m128d
10069 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10070 _mm_maskz_roundscale_pd (__mmask8 __U, __m128d __A, const int __imm)
10072 return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
10073 __imm,
10074 (__v2df)
10075 _mm_setzero_pd (),
10076 (__mmask8) __U);
10079 extern __inline __m256
10080 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10081 _mm256_getmant_ps (__m256 __A, _MM_MANTISSA_NORM_ENUM __B,
10082 _MM_MANTISSA_SIGN_ENUM __C)
10084 return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
10085 (__C << 2) | __B,
10086 (__v8sf)
10087 _mm256_setzero_ps (),
10088 (__mmask8) -1);
10091 extern __inline __m256
10092 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10093 _mm256_mask_getmant_ps (__m256 __W, __mmask8 __U, __m256 __A,
10094 _MM_MANTISSA_NORM_ENUM __B,
10095 _MM_MANTISSA_SIGN_ENUM __C)
10097 return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
10098 (__C << 2) | __B,
10099 (__v8sf) __W,
10100 (__mmask8) __U);
10103 extern __inline __m256
10104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10105 _mm256_maskz_getmant_ps (__mmask8 __U, __m256 __A,
10106 _MM_MANTISSA_NORM_ENUM __B,
10107 _MM_MANTISSA_SIGN_ENUM __C)
10109 return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
10110 (__C << 2) | __B,
10111 (__v8sf)
10112 _mm256_setzero_ps (),
10113 (__mmask8) __U);
10116 extern __inline __m128
10117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10118 _mm_getmant_ps (__m128 __A, _MM_MANTISSA_NORM_ENUM __B,
10119 _MM_MANTISSA_SIGN_ENUM __C)
10121 return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
10122 (__C << 2) | __B,
10123 (__v4sf)
10124 _mm_setzero_ps (),
10125 (__mmask8) -1);
10128 extern __inline __m128
10129 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10130 _mm_mask_getmant_ps (__m128 __W, __mmask8 __U, __m128 __A,
10131 _MM_MANTISSA_NORM_ENUM __B,
10132 _MM_MANTISSA_SIGN_ENUM __C)
10134 return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
10135 (__C << 2) | __B,
10136 (__v4sf) __W,
10137 (__mmask8) __U);
10140 extern __inline __m128
10141 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10142 _mm_maskz_getmant_ps (__mmask8 __U, __m128 __A,
10143 _MM_MANTISSA_NORM_ENUM __B,
10144 _MM_MANTISSA_SIGN_ENUM __C)
10146 return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
10147 (__C << 2) | __B,
10148 (__v4sf)
10149 _mm_setzero_ps (),
10150 (__mmask8) __U);
10153 extern __inline __m256d
10154 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10155 _mm256_getmant_pd (__m256d __A, _MM_MANTISSA_NORM_ENUM __B,
10156 _MM_MANTISSA_SIGN_ENUM __C)
10158 return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
10159 (__C << 2) | __B,
10160 (__v4df)
10161 _mm256_setzero_pd (),
10162 (__mmask8) -1);
10165 extern __inline __m256d
10166 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10167 _mm256_mask_getmant_pd (__m256d __W, __mmask8 __U, __m256d __A,
10168 _MM_MANTISSA_NORM_ENUM __B,
10169 _MM_MANTISSA_SIGN_ENUM __C)
10171 return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
10172 (__C << 2) | __B,
10173 (__v4df) __W,
10174 (__mmask8) __U);
10177 extern __inline __m256d
10178 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10179 _mm256_maskz_getmant_pd (__mmask8 __U, __m256d __A,
10180 _MM_MANTISSA_NORM_ENUM __B,
10181 _MM_MANTISSA_SIGN_ENUM __C)
10183 return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
10184 (__C << 2) | __B,
10185 (__v4df)
10186 _mm256_setzero_pd (),
10187 (__mmask8) __U);
10190 extern __inline __m128d
10191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10192 _mm_getmant_pd (__m128d __A, _MM_MANTISSA_NORM_ENUM __B,
10193 _MM_MANTISSA_SIGN_ENUM __C)
10195 return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
10196 (__C << 2) | __B,
10197 (__v2df)
10198 _mm_setzero_pd (),
10199 (__mmask8) -1);
10202 extern __inline __m128d
10203 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10204 _mm_mask_getmant_pd (__m128d __W, __mmask8 __U, __m128d __A,
10205 _MM_MANTISSA_NORM_ENUM __B,
10206 _MM_MANTISSA_SIGN_ENUM __C)
10208 return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
10209 (__C << 2) | __B,
10210 (__v2df) __W,
10211 (__mmask8) __U);
10214 extern __inline __m128d
10215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10216 _mm_maskz_getmant_pd (__mmask8 __U, __m128d __A,
10217 _MM_MANTISSA_NORM_ENUM __B,
10218 _MM_MANTISSA_SIGN_ENUM __C)
10220 return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
10221 (__C << 2) | __B,
10222 (__v2df)
10223 _mm_setzero_pd (),
10224 (__mmask8) __U);
10227 extern __inline __m256
10228 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10229 _mm256_mmask_i32gather_ps (__m256 __v1_old, __mmask8 __mask,
10230 __m256i __index, float const *__addr,
10231 int __scale)
10233 return (__m256) __builtin_ia32_gather3siv8sf ((__v8sf) __v1_old,
10234 __addr,
10235 (__v8si) __index,
10236 __mask, __scale);
10239 extern __inline __m128
10240 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10241 _mm_mmask_i32gather_ps (__m128 __v1_old, __mmask8 __mask,
10242 __m128i __index, float const *__addr,
10243 int __scale)
10245 return (__m128) __builtin_ia32_gather3siv4sf ((__v4sf) __v1_old,
10246 __addr,
10247 (__v4si) __index,
10248 __mask, __scale);
10251 extern __inline __m256d
10252 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10253 _mm256_mmask_i32gather_pd (__m256d __v1_old, __mmask8 __mask,
10254 __m128i __index, double const *__addr,
10255 int __scale)
10257 return (__m256d) __builtin_ia32_gather3siv4df ((__v4df) __v1_old,
10258 __addr,
10259 (__v4si) __index,
10260 __mask, __scale);
10263 extern __inline __m128d
10264 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10265 _mm_mmask_i32gather_pd (__m128d __v1_old, __mmask8 __mask,
10266 __m128i __index, double const *__addr,
10267 int __scale)
10269 return (__m128d) __builtin_ia32_gather3siv2df ((__v2df) __v1_old,
10270 __addr,
10271 (__v4si) __index,
10272 __mask, __scale);
10275 extern __inline __m128
10276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10277 _mm256_mmask_i64gather_ps (__m128 __v1_old, __mmask8 __mask,
10278 __m256i __index, float const *__addr,
10279 int __scale)
10281 return (__m128) __builtin_ia32_gather3div8sf ((__v4sf) __v1_old,
10282 __addr,
10283 (__v4di) __index,
10284 __mask, __scale);
10287 extern __inline __m128
10288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10289 _mm_mmask_i64gather_ps (__m128 __v1_old, __mmask8 __mask,
10290 __m128i __index, float const *__addr,
10291 int __scale)
10293 return (__m128) __builtin_ia32_gather3div4sf ((__v4sf) __v1_old,
10294 __addr,
10295 (__v2di) __index,
10296 __mask, __scale);
10299 extern __inline __m256d
10300 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10301 _mm256_mmask_i64gather_pd (__m256d __v1_old, __mmask8 __mask,
10302 __m256i __index, double const *__addr,
10303 int __scale)
10305 return (__m256d) __builtin_ia32_gather3div4df ((__v4df) __v1_old,
10306 __addr,
10307 (__v4di) __index,
10308 __mask, __scale);
10311 extern __inline __m128d
10312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10313 _mm_mmask_i64gather_pd (__m128d __v1_old, __mmask8 __mask,
10314 __m128i __index, double const *__addr,
10315 int __scale)
10317 return (__m128d) __builtin_ia32_gather3div2df ((__v2df) __v1_old,
10318 __addr,
10319 (__v2di) __index,
10320 __mask, __scale);
10323 extern __inline __m256i
10324 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10325 _mm256_mmask_i32gather_epi32 (__m256i __v1_old, __mmask8 __mask,
10326 __m256i __index, int const *__addr,
10327 int __scale)
10329 return (__m256i) __builtin_ia32_gather3siv8si ((__v8si) __v1_old,
10330 __addr,
10331 (__v8si) __index,
10332 __mask, __scale);
10335 extern __inline __m128i
10336 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10337 _mm_mmask_i32gather_epi32 (__m128i __v1_old, __mmask8 __mask,
10338 __m128i __index, int const *__addr,
10339 int __scale)
10341 return (__m128i) __builtin_ia32_gather3siv4si ((__v4si) __v1_old,
10342 __addr,
10343 (__v4si) __index,
10344 __mask, __scale);
10347 extern __inline __m256i
10348 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10349 _mm256_mmask_i32gather_epi64 (__m256i __v1_old, __mmask8 __mask,
10350 __m128i __index, long long const *__addr,
10351 int __scale)
10353 return (__m256i) __builtin_ia32_gather3siv4di ((__v4di) __v1_old,
10354 __addr,
10355 (__v4si) __index,
10356 __mask, __scale);
10359 extern __inline __m128i
10360 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10361 _mm_mmask_i32gather_epi64 (__m128i __v1_old, __mmask8 __mask,
10362 __m128i __index, long long const *__addr,
10363 int __scale)
10365 return (__m128i) __builtin_ia32_gather3siv2di ((__v2di) __v1_old,
10366 __addr,
10367 (__v4si) __index,
10368 __mask, __scale);
10371 extern __inline __m128i
10372 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10373 _mm256_mmask_i64gather_epi32 (__m128i __v1_old, __mmask8 __mask,
10374 __m256i __index, int const *__addr,
10375 int __scale)
10377 return (__m128i) __builtin_ia32_gather3div8si ((__v4si) __v1_old,
10378 __addr,
10379 (__v4di) __index,
10380 __mask, __scale);
10383 extern __inline __m128i
10384 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10385 _mm_mmask_i64gather_epi32 (__m128i __v1_old, __mmask8 __mask,
10386 __m128i __index, int const *__addr,
10387 int __scale)
10389 return (__m128i) __builtin_ia32_gather3div4si ((__v4si) __v1_old,
10390 __addr,
10391 (__v2di) __index,
10392 __mask, __scale);
10395 extern __inline __m256i
10396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10397 _mm256_mmask_i64gather_epi64 (__m256i __v1_old, __mmask8 __mask,
10398 __m256i __index, long long const *__addr,
10399 int __scale)
10401 return (__m256i) __builtin_ia32_gather3div4di ((__v4di) __v1_old,
10402 __addr,
10403 (__v4di) __index,
10404 __mask, __scale);
10407 extern __inline __m128i
10408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10409 _mm_mmask_i64gather_epi64 (__m128i __v1_old, __mmask8 __mask,
10410 __m128i __index, long long const *__addr,
10411 int __scale)
10413 return (__m128i) __builtin_ia32_gather3div2di ((__v2di) __v1_old,
10414 __addr,
10415 (__v2di) __index,
10416 __mask, __scale);
10419 extern __inline void
10420 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10421 _mm256_i32scatter_ps (float *__addr, __m256i __index,
10422 __m256 __v1, const int __scale)
10424 __builtin_ia32_scattersiv8sf (__addr, (__mmask8) 0xFF,
10425 (__v8si) __index, (__v8sf) __v1,
10426 __scale);
10429 extern __inline void
10430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10431 _mm256_mask_i32scatter_ps (float *__addr, __mmask8 __mask,
10432 __m256i __index, __m256 __v1,
10433 const int __scale)
10435 __builtin_ia32_scattersiv8sf (__addr, __mask, (__v8si) __index,
10436 (__v8sf) __v1, __scale);
10439 extern __inline void
10440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10441 _mm_i32scatter_ps (float *__addr, __m128i __index, __m128 __v1,
10442 const int __scale)
10444 __builtin_ia32_scattersiv4sf (__addr, (__mmask8) 0xFF,
10445 (__v4si) __index, (__v4sf) __v1,
10446 __scale);
10449 extern __inline void
10450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10451 _mm_mask_i32scatter_ps (float *__addr, __mmask8 __mask,
10452 __m128i __index, __m128 __v1,
10453 const int __scale)
10455 __builtin_ia32_scattersiv4sf (__addr, __mask, (__v4si) __index,
10456 (__v4sf) __v1, __scale);
10459 extern __inline void
10460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10461 _mm256_i32scatter_pd (double *__addr, __m128i __index,
10462 __m256d __v1, const int __scale)
10464 __builtin_ia32_scattersiv4df (__addr, (__mmask8) 0xFF,
10465 (__v4si) __index, (__v4df) __v1,
10466 __scale);
10469 extern __inline void
10470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10471 _mm256_mask_i32scatter_pd (double *__addr, __mmask8 __mask,
10472 __m128i __index, __m256d __v1,
10473 const int __scale)
10475 __builtin_ia32_scattersiv4df (__addr, __mask, (__v4si) __index,
10476 (__v4df) __v1, __scale);
10479 extern __inline void
10480 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10481 _mm_i32scatter_pd (double *__addr, __m128i __index,
10482 __m128d __v1, const int __scale)
10484 __builtin_ia32_scattersiv2df (__addr, (__mmask8) 0xFF,
10485 (__v4si) __index, (__v2df) __v1,
10486 __scale);
10489 extern __inline void
10490 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10491 _mm_mask_i32scatter_pd (double *__addr, __mmask8 __mask,
10492 __m128i __index, __m128d __v1,
10493 const int __scale)
10495 __builtin_ia32_scattersiv2df (__addr, __mask, (__v4si) __index,
10496 (__v2df) __v1, __scale);
10499 extern __inline void
10500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10501 _mm256_i64scatter_ps (float *__addr, __m256i __index,
10502 __m128 __v1, const int __scale)
10504 __builtin_ia32_scatterdiv8sf (__addr, (__mmask8) 0xFF,
10505 (__v4di) __index, (__v4sf) __v1,
10506 __scale);
10509 extern __inline void
10510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10511 _mm256_mask_i64scatter_ps (float *__addr, __mmask8 __mask,
10512 __m256i __index, __m128 __v1,
10513 const int __scale)
10515 __builtin_ia32_scatterdiv8sf (__addr, __mask, (__v4di) __index,
10516 (__v4sf) __v1, __scale);
10519 extern __inline void
10520 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10521 _mm_i64scatter_ps (float *__addr, __m128i __index, __m128 __v1,
10522 const int __scale)
10524 __builtin_ia32_scatterdiv4sf (__addr, (__mmask8) 0xFF,
10525 (__v2di) __index, (__v4sf) __v1,
10526 __scale);
10529 extern __inline void
10530 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10531 _mm_mask_i64scatter_ps (float *__addr, __mmask8 __mask,
10532 __m128i __index, __m128 __v1,
10533 const int __scale)
10535 __builtin_ia32_scatterdiv4sf (__addr, __mask, (__v2di) __index,
10536 (__v4sf) __v1, __scale);
10539 extern __inline void
10540 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10541 _mm256_i64scatter_pd (double *__addr, __m256i __index,
10542 __m256d __v1, const int __scale)
10544 __builtin_ia32_scatterdiv4df (__addr, (__mmask8) 0xFF,
10545 (__v4di) __index, (__v4df) __v1,
10546 __scale);
10549 extern __inline void
10550 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10551 _mm256_mask_i64scatter_pd (double *__addr, __mmask8 __mask,
10552 __m256i __index, __m256d __v1,
10553 const int __scale)
10555 __builtin_ia32_scatterdiv4df (__addr, __mask, (__v4di) __index,
10556 (__v4df) __v1, __scale);
10559 extern __inline void
10560 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10561 _mm_i64scatter_pd (double *__addr, __m128i __index,
10562 __m128d __v1, const int __scale)
10564 __builtin_ia32_scatterdiv2df (__addr, (__mmask8) 0xFF,
10565 (__v2di) __index, (__v2df) __v1,
10566 __scale);
10569 extern __inline void
10570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10571 _mm_mask_i64scatter_pd (double *__addr, __mmask8 __mask,
10572 __m128i __index, __m128d __v1,
10573 const int __scale)
10575 __builtin_ia32_scatterdiv2df (__addr, __mask, (__v2di) __index,
10576 (__v2df) __v1, __scale);
10579 extern __inline void
10580 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10581 _mm256_i32scatter_epi32 (int *__addr, __m256i __index,
10582 __m256i __v1, const int __scale)
10584 __builtin_ia32_scattersiv8si (__addr, (__mmask8) 0xFF,
10585 (__v8si) __index, (__v8si) __v1,
10586 __scale);
10589 extern __inline void
10590 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10591 _mm256_mask_i32scatter_epi32 (int *__addr, __mmask8 __mask,
10592 __m256i __index, __m256i __v1,
10593 const int __scale)
10595 __builtin_ia32_scattersiv8si (__addr, __mask, (__v8si) __index,
10596 (__v8si) __v1, __scale);
10599 extern __inline void
10600 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10601 _mm_i32scatter_epi32 (int *__addr, __m128i __index,
10602 __m128i __v1, const int __scale)
10604 __builtin_ia32_scattersiv4si (__addr, (__mmask8) 0xFF,
10605 (__v4si) __index, (__v4si) __v1,
10606 __scale);
10609 extern __inline void
10610 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10611 _mm_mask_i32scatter_epi32 (int *__addr, __mmask8 __mask,
10612 __m128i __index, __m128i __v1,
10613 const int __scale)
10615 __builtin_ia32_scattersiv4si (__addr, __mask, (__v4si) __index,
10616 (__v4si) __v1, __scale);
10619 extern __inline void
10620 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10621 _mm256_i32scatter_epi64 (long long *__addr, __m128i __index,
10622 __m256i __v1, const int __scale)
10624 __builtin_ia32_scattersiv4di (__addr, (__mmask8) 0xFF,
10625 (__v4si) __index, (__v4di) __v1,
10626 __scale);
10629 extern __inline void
10630 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10631 _mm256_mask_i32scatter_epi64 (long long *__addr, __mmask8 __mask,
10632 __m128i __index, __m256i __v1,
10633 const int __scale)
10635 __builtin_ia32_scattersiv4di (__addr, __mask, (__v4si) __index,
10636 (__v4di) __v1, __scale);
10639 extern __inline void
10640 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10641 _mm_i32scatter_epi64 (long long *__addr, __m128i __index,
10642 __m128i __v1, const int __scale)
10644 __builtin_ia32_scattersiv2di (__addr, (__mmask8) 0xFF,
10645 (__v4si) __index, (__v2di) __v1,
10646 __scale);
10649 extern __inline void
10650 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10651 _mm_mask_i32scatter_epi64 (long long *__addr, __mmask8 __mask,
10652 __m128i __index, __m128i __v1,
10653 const int __scale)
10655 __builtin_ia32_scattersiv2di (__addr, __mask, (__v4si) __index,
10656 (__v2di) __v1, __scale);
10659 extern __inline void
10660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10661 _mm256_i64scatter_epi32 (int *__addr, __m256i __index,
10662 __m128i __v1, const int __scale)
10664 __builtin_ia32_scatterdiv8si (__addr, (__mmask8) 0xFF,
10665 (__v4di) __index, (__v4si) __v1,
10666 __scale);
10669 extern __inline void
10670 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10671 _mm256_mask_i64scatter_epi32 (int *__addr, __mmask8 __mask,
10672 __m256i __index, __m128i __v1,
10673 const int __scale)
10675 __builtin_ia32_scatterdiv8si (__addr, __mask, (__v4di) __index,
10676 (__v4si) __v1, __scale);
10679 extern __inline void
10680 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10681 _mm_i64scatter_epi32 (int *__addr, __m128i __index,
10682 __m128i __v1, const int __scale)
10684 __builtin_ia32_scatterdiv4si (__addr, (__mmask8) 0xFF,
10685 (__v2di) __index, (__v4si) __v1,
10686 __scale);
10689 extern __inline void
10690 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10691 _mm_mask_i64scatter_epi32 (int *__addr, __mmask8 __mask,
10692 __m128i __index, __m128i __v1,
10693 const int __scale)
10695 __builtin_ia32_scatterdiv4si (__addr, __mask, (__v2di) __index,
10696 (__v4si) __v1, __scale);
10699 extern __inline void
10700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10701 _mm256_i64scatter_epi64 (long long *__addr, __m256i __index,
10702 __m256i __v1, const int __scale)
10704 __builtin_ia32_scatterdiv4di (__addr, (__mmask8) 0xFF,
10705 (__v4di) __index, (__v4di) __v1,
10706 __scale);
10709 extern __inline void
10710 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10711 _mm256_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask,
10712 __m256i __index, __m256i __v1,
10713 const int __scale)
10715 __builtin_ia32_scatterdiv4di (__addr, __mask, (__v4di) __index,
10716 (__v4di) __v1, __scale);
10719 extern __inline void
10720 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10721 _mm_i64scatter_epi64 (long long *__addr, __m128i __index,
10722 __m128i __v1, const int __scale)
10724 __builtin_ia32_scatterdiv2di (__addr, (__mmask8) 0xFF,
10725 (__v2di) __index, (__v2di) __v1,
10726 __scale);
10729 extern __inline void
10730 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10731 _mm_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask,
10732 __m128i __index, __m128i __v1,
10733 const int __scale)
10735 __builtin_ia32_scatterdiv2di (__addr, __mask, (__v2di) __index,
10736 (__v2di) __v1, __scale);
10739 extern __inline __m256i
10740 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10741 _mm256_mask_shuffle_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
10742 _MM_PERM_ENUM __mask)
10744 return (__m256i) __builtin_ia32_pshufd256_mask ((__v8si) __A, __mask,
10745 (__v8si) __W,
10746 (__mmask8) __U);
10749 extern __inline __m256i
10750 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10751 _mm256_maskz_shuffle_epi32 (__mmask8 __U, __m256i __A,
10752 _MM_PERM_ENUM __mask)
10754 return (__m256i) __builtin_ia32_pshufd256_mask ((__v8si) __A, __mask,
10755 (__v8si)
10756 _mm256_setzero_si256 (),
10757 (__mmask8) __U);
10760 extern __inline __m128i
10761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10762 _mm_mask_shuffle_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
10763 _MM_PERM_ENUM __mask)
10765 return (__m128i) __builtin_ia32_pshufd128_mask ((__v4si) __A, __mask,
10766 (__v4si) __W,
10767 (__mmask8) __U);
10770 extern __inline __m128i
10771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10772 _mm_maskz_shuffle_epi32 (__mmask8 __U, __m128i __A,
10773 _MM_PERM_ENUM __mask)
10775 return (__m128i) __builtin_ia32_pshufd128_mask ((__v4si) __A, __mask,
10776 (__v4si)
10777 _mm_setzero_si128 (),
10778 (__mmask8) __U);
10781 extern __inline __m256i
10782 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10783 _mm256_rol_epi32 (__m256i __A, const int __B)
10785 return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
10786 (__v8si)
10787 _mm256_setzero_si256 (),
10788 (__mmask8) -1);
10791 extern __inline __m256i
10792 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10793 _mm256_mask_rol_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
10794 const int __B)
10796 return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
10797 (__v8si) __W,
10798 (__mmask8) __U);
10801 extern __inline __m256i
10802 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10803 _mm256_maskz_rol_epi32 (__mmask8 __U, __m256i __A, const int __B)
10805 return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
10806 (__v8si)
10807 _mm256_setzero_si256 (),
10808 (__mmask8) __U);
10811 extern __inline __m128i
10812 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10813 _mm_rol_epi32 (__m128i __A, const int __B)
10815 return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
10816 (__v4si)
10817 _mm_setzero_si128 (),
10818 (__mmask8) -1);
10821 extern __inline __m128i
10822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10823 _mm_mask_rol_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
10824 const int __B)
10826 return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
10827 (__v4si) __W,
10828 (__mmask8) __U);
10831 extern __inline __m128i
10832 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10833 _mm_maskz_rol_epi32 (__mmask8 __U, __m128i __A, const int __B)
10835 return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
10836 (__v4si)
10837 _mm_setzero_si128 (),
10838 (__mmask8) __U);
10841 extern __inline __m256i
10842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10843 _mm256_ror_epi32 (__m256i __A, const int __B)
10845 return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
10846 (__v8si)
10847 _mm256_setzero_si256 (),
10848 (__mmask8) -1);
10851 extern __inline __m256i
10852 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10853 _mm256_mask_ror_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
10854 const int __B)
10856 return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
10857 (__v8si) __W,
10858 (__mmask8) __U);
10861 extern __inline __m256i
10862 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10863 _mm256_maskz_ror_epi32 (__mmask8 __U, __m256i __A, const int __B)
10865 return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
10866 (__v8si)
10867 _mm256_setzero_si256 (),
10868 (__mmask8) __U);
10871 extern __inline __m128i
10872 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10873 _mm_ror_epi32 (__m128i __A, const int __B)
10875 return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
10876 (__v4si)
10877 _mm_setzero_si128 (),
10878 (__mmask8) -1);
10881 extern __inline __m128i
10882 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10883 _mm_mask_ror_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
10884 const int __B)
10886 return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
10887 (__v4si) __W,
10888 (__mmask8) __U);
10891 extern __inline __m128i
10892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10893 _mm_maskz_ror_epi32 (__mmask8 __U, __m128i __A, const int __B)
10895 return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
10896 (__v4si)
10897 _mm_setzero_si128 (),
10898 (__mmask8) __U);
10901 extern __inline __m256i
10902 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10903 _mm256_rol_epi64 (__m256i __A, const int __B)
10905 return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
10906 (__v4di)
10907 _mm256_setzero_si256 (),
10908 (__mmask8) -1);
10911 extern __inline __m256i
10912 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10913 _mm256_mask_rol_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
10914 const int __B)
10916 return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
10917 (__v4di) __W,
10918 (__mmask8) __U);
10921 extern __inline __m256i
10922 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10923 _mm256_maskz_rol_epi64 (__mmask8 __U, __m256i __A, const int __B)
10925 return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
10926 (__v4di)
10927 _mm256_setzero_si256 (),
10928 (__mmask8) __U);
10931 extern __inline __m128i
10932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10933 _mm_rol_epi64 (__m128i __A, const int __B)
10935 return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
10936 (__v2di)
10937 _mm_setzero_di (),
10938 (__mmask8) -1);
10941 extern __inline __m128i
10942 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10943 _mm_mask_rol_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
10944 const int __B)
10946 return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
10947 (__v2di) __W,
10948 (__mmask8) __U);
10951 extern __inline __m128i
10952 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10953 _mm_maskz_rol_epi64 (__mmask8 __U, __m128i __A, const int __B)
10955 return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
10956 (__v2di)
10957 _mm_setzero_di (),
10958 (__mmask8) __U);
10961 extern __inline __m256i
10962 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10963 _mm256_ror_epi64 (__m256i __A, const int __B)
10965 return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
10966 (__v4di)
10967 _mm256_setzero_si256 (),
10968 (__mmask8) -1);
10971 extern __inline __m256i
10972 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10973 _mm256_mask_ror_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
10974 const int __B)
10976 return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
10977 (__v4di) __W,
10978 (__mmask8) __U);
10981 extern __inline __m256i
10982 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10983 _mm256_maskz_ror_epi64 (__mmask8 __U, __m256i __A, const int __B)
10985 return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
10986 (__v4di)
10987 _mm256_setzero_si256 (),
10988 (__mmask8) __U);
10991 extern __inline __m128i
10992 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10993 _mm_ror_epi64 (__m128i __A, const int __B)
10995 return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
10996 (__v2di)
10997 _mm_setzero_di (),
10998 (__mmask8) -1);
11001 extern __inline __m128i
11002 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11003 _mm_mask_ror_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
11004 const int __B)
11006 return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
11007 (__v2di) __W,
11008 (__mmask8) __U);
11011 extern __inline __m128i
11012 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11013 _mm_maskz_ror_epi64 (__mmask8 __U, __m128i __A, const int __B)
11015 return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
11016 (__v2di)
11017 _mm_setzero_di (),
11018 (__mmask8) __U);
11021 extern __inline __m128i
11022 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11023 _mm_alignr_epi32 (__m128i __A, __m128i __B, const int __imm)
11025 return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
11026 (__v4si) __B, __imm,
11027 (__v4si)
11028 _mm_setzero_si128 (),
11029 (__mmask8) -1);
11032 extern __inline __m128i
11033 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11034 _mm_mask_alignr_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11035 __m128i __B, const int __imm)
11037 return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
11038 (__v4si) __B, __imm,
11039 (__v4si) __W,
11040 (__mmask8) __U);
11043 extern __inline __m128i
11044 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11045 _mm_maskz_alignr_epi32 (__mmask8 __U, __m128i __A, __m128i __B,
11046 const int __imm)
11048 return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
11049 (__v4si) __B, __imm,
11050 (__v4si)
11051 _mm_setzero_si128 (),
11052 (__mmask8) __U);
11055 extern __inline __m128i
11056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11057 _mm_alignr_epi64 (__m128i __A, __m128i __B, const int __imm)
11059 return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
11060 (__v2di) __B, __imm,
11061 (__v2di)
11062 _mm_setzero_di (),
11063 (__mmask8) -1);
11066 extern __inline __m128i
11067 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11068 _mm_mask_alignr_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
11069 __m128i __B, const int __imm)
11071 return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
11072 (__v2di) __B, __imm,
11073 (__v2di) __W,
11074 (__mmask8) __U);
11077 extern __inline __m128i
11078 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11079 _mm_maskz_alignr_epi64 (__mmask8 __U, __m128i __A, __m128i __B,
11080 const int __imm)
11082 return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
11083 (__v2di) __B, __imm,
11084 (__v2di)
11085 _mm_setzero_di (),
11086 (__mmask8) __U);
11089 extern __inline __m256i
11090 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11091 _mm256_alignr_epi32 (__m256i __A, __m256i __B, const int __imm)
11093 return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
11094 (__v8si) __B, __imm,
11095 (__v8si)
11096 _mm256_setzero_si256 (),
11097 (__mmask8) -1);
11100 extern __inline __m256i
11101 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11102 _mm256_mask_alignr_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11103 __m256i __B, const int __imm)
11105 return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
11106 (__v8si) __B, __imm,
11107 (__v8si) __W,
11108 (__mmask8) __U);
11111 extern __inline __m256i
11112 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11113 _mm256_maskz_alignr_epi32 (__mmask8 __U, __m256i __A, __m256i __B,
11114 const int __imm)
11116 return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
11117 (__v8si) __B, __imm,
11118 (__v8si)
11119 _mm256_setzero_si256 (),
11120 (__mmask8) __U);
11123 extern __inline __m256i
11124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11125 _mm256_alignr_epi64 (__m256i __A, __m256i __B, const int __imm)
11127 return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
11128 (__v4di) __B, __imm,
11129 (__v4di)
11130 _mm256_setzero_si256 (),
11131 (__mmask8) -1);
11134 extern __inline __m256i
11135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11136 _mm256_mask_alignr_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11137 __m256i __B, const int __imm)
11139 return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
11140 (__v4di) __B, __imm,
11141 (__v4di) __W,
11142 (__mmask8) __U);
11145 extern __inline __m256i
11146 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11147 _mm256_maskz_alignr_epi64 (__mmask8 __U, __m256i __A, __m256i __B,
11148 const int __imm)
11150 return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
11151 (__v4di) __B, __imm,
11152 (__v4di)
11153 _mm256_setzero_si256 (),
11154 (__mmask8) __U);
11157 extern __inline __m128i
11158 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11159 _mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A,
11160 const int __I)
11162 return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I,
11163 (__v8hi) __W,
11164 (__mmask8) __U);
11167 extern __inline __m128i
11168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11169 _mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A, const int __I)
11171 return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I,
11172 (__v8hi)
11173 _mm_setzero_hi (),
11174 (__mmask8) __U);
11177 extern __inline __m128i
11178 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11179 _mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A,
11180 const int __I)
11182 return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I,
11183 (__v8hi) __W,
11184 (__mmask8) __U);
11187 extern __inline __m128i
11188 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11189 _mm256_maskz_cvtps_ph (__mmask8 __U, __m256 __A, const int __I)
11191 return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I,
11192 (__v8hi)
11193 _mm_setzero_hi (),
11194 (__mmask8) __U);
11197 extern __inline __m256i
11198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11199 _mm256_mask_srai_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11200 const int __imm)
11202 return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm,
11203 (__v8si) __W,
11204 (__mmask8) __U);
11207 extern __inline __m256i
11208 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11209 _mm256_maskz_srai_epi32 (__mmask8 __U, __m256i __A, const int __imm)
11211 return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm,
11212 (__v8si)
11213 _mm256_setzero_si256 (),
11214 (__mmask8) __U);
11217 extern __inline __m128i
11218 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11219 _mm_mask_srai_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11220 const int __imm)
11222 return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm,
11223 (__v4si) __W,
11224 (__mmask8) __U);
11227 extern __inline __m128i
11228 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11229 _mm_maskz_srai_epi32 (__mmask8 __U, __m128i __A, const int __imm)
11231 return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm,
11232 (__v4si)
11233 _mm_setzero_si128 (),
11234 (__mmask8) __U);
11237 extern __inline __m256i
11238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11239 _mm256_srai_epi64 (__m256i __A, const int __imm)
11241 return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
11242 (__v4di)
11243 _mm256_setzero_si256 (),
11244 (__mmask8) -1);
11247 extern __inline __m256i
11248 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11249 _mm256_mask_srai_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11250 const int __imm)
11252 return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
11253 (__v4di) __W,
11254 (__mmask8) __U);
11257 extern __inline __m256i
11258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11259 _mm256_maskz_srai_epi64 (__mmask8 __U, __m256i __A, const int __imm)
11261 return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
11262 (__v4di)
11263 _mm256_setzero_si256 (),
11264 (__mmask8) __U);
11267 extern __inline __m128i
11268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11269 _mm_srai_epi64 (__m128i __A, const int __imm)
11271 return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
11272 (__v2di)
11273 _mm_setzero_di (),
11274 (__mmask8) -1);
11277 extern __inline __m128i
11278 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11279 _mm_mask_srai_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
11280 const int __imm)
11282 return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
11283 (__v2di) __W,
11284 (__mmask8) __U);
11287 extern __inline __m128i
11288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11289 _mm_maskz_srai_epi64 (__mmask8 __U, __m128i __A, const int __imm)
11291 return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
11292 (__v2di)
11293 _mm_setzero_si128 (),
11294 (__mmask8) __U);
11297 extern __inline __m128i
11298 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11299 _mm_mask_slli_epi32 (__m128i __W, __mmask8 __U, __m128i __A, int __B)
11301 return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B,
11302 (__v4si) __W,
11303 (__mmask8) __U);
11306 extern __inline __m128i
11307 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11308 _mm_maskz_slli_epi32 (__mmask8 __U, __m128i __A, int __B)
11310 return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B,
11311 (__v4si)
11312 _mm_setzero_si128 (),
11313 (__mmask8) __U);
11316 extern __inline __m128i
11317 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11318 _mm_mask_slli_epi64 (__m128i __W, __mmask8 __U, __m128i __A, int __B)
11320 return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B,
11321 (__v2di) __W,
11322 (__mmask8) __U);
11325 extern __inline __m128i
11326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11327 _mm_maskz_slli_epi64 (__mmask8 __U, __m128i __A, int __B)
11329 return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B,
11330 (__v2di)
11331 _mm_setzero_di (),
11332 (__mmask8) __U);
11335 extern __inline __m256i
11336 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11337 _mm256_mask_slli_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11338 int __B)
11340 return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B,
11341 (__v8si) __W,
11342 (__mmask8) __U);
11345 extern __inline __m256i
11346 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11347 _mm256_maskz_slli_epi32 (__mmask8 __U, __m256i __A, int __B)
11349 return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B,
11350 (__v8si)
11351 _mm256_setzero_si256 (),
11352 (__mmask8) __U);
11355 extern __inline __m256i
11356 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11357 _mm256_mask_slli_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11358 int __B)
11360 return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B,
11361 (__v4di) __W,
11362 (__mmask8) __U);
11365 extern __inline __m256i
11366 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11367 _mm256_maskz_slli_epi64 (__mmask8 __U, __m256i __A, int __B)
11369 return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B,
11370 (__v4di)
11371 _mm256_setzero_si256 (),
11372 (__mmask8) __U);
11375 extern __inline __m256d
11376 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11377 _mm256_mask_permutex_pd (__m256d __W, __mmask8 __U, __m256d __X,
11378 const int __imm)
11380 return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __imm,
11381 (__v4df) __W,
11382 (__mmask8) __U);
11385 extern __inline __m256d
11386 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11387 _mm256_maskz_permutex_pd (__mmask8 __U, __m256d __X, const int __imm)
11389 return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __imm,
11390 (__v4df)
11391 _mm256_setzero_pd (),
11392 (__mmask8) __U);
11395 extern __inline __m256d
11396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11397 _mm256_mask_permute_pd (__m256d __W, __mmask8 __U, __m256d __X,
11398 const int __C)
11400 return (__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df) __X, __C,
11401 (__v4df) __W,
11402 (__mmask8) __U);
11405 extern __inline __m256d
11406 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11407 _mm256_maskz_permute_pd (__mmask8 __U, __m256d __X, const int __C)
11409 return (__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df) __X, __C,
11410 (__v4df)
11411 _mm256_setzero_pd (),
11412 (__mmask8) __U);
11415 extern __inline __m128d
11416 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11417 _mm_mask_permute_pd (__m128d __W, __mmask8 __U, __m128d __X,
11418 const int __C)
11420 return (__m128d) __builtin_ia32_vpermilpd_mask ((__v2df) __X, __C,
11421 (__v2df) __W,
11422 (__mmask8) __U);
11425 extern __inline __m128d
11426 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11427 _mm_maskz_permute_pd (__mmask8 __U, __m128d __X, const int __C)
11429 return (__m128d) __builtin_ia32_vpermilpd_mask ((__v2df) __X, __C,
11430 (__v2df)
11431 _mm_setzero_pd (),
11432 (__mmask8) __U);
11435 extern __inline __m256
11436 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11437 _mm256_mask_permute_ps (__m256 __W, __mmask8 __U, __m256 __X,
11438 const int __C)
11440 return (__m256) __builtin_ia32_vpermilps256_mask ((__v8sf) __X, __C,
11441 (__v8sf) __W,
11442 (__mmask8) __U);
11445 extern __inline __m256
11446 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11447 _mm256_maskz_permute_ps (__mmask8 __U, __m256 __X, const int __C)
11449 return (__m256) __builtin_ia32_vpermilps256_mask ((__v8sf) __X, __C,
11450 (__v8sf)
11451 _mm256_setzero_ps (),
11452 (__mmask8) __U);
11455 extern __inline __m128
11456 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11457 _mm_mask_permute_ps (__m128 __W, __mmask8 __U, __m128 __X,
11458 const int __C)
11460 return (__m128) __builtin_ia32_vpermilps_mask ((__v4sf) __X, __C,
11461 (__v4sf) __W,
11462 (__mmask8) __U);
11465 extern __inline __m128
11466 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11467 _mm_maskz_permute_ps (__mmask8 __U, __m128 __X, const int __C)
11469 return (__m128) __builtin_ia32_vpermilps_mask ((__v4sf) __X, __C,
11470 (__v4sf)
11471 _mm_setzero_ps (),
11472 (__mmask8) __U);
11475 extern __inline __m256d
11476 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11477 _mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W)
11479 return (__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) __A,
11480 (__v4df) __W,
11481 (__mmask8) __U);
11484 extern __inline __m256
11485 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11486 _mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W)
11488 return (__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) __A,
11489 (__v8sf) __W,
11490 (__mmask8) __U);
11493 extern __inline __m256i
11494 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11495 _mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W)
11497 return (__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) __A,
11498 (__v4di) __W,
11499 (__mmask8) __U);
11502 extern __inline __m256i
11503 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11504 _mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W)
11506 return (__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) __A,
11507 (__v8si) __W,
11508 (__mmask8) __U);
11511 extern __inline __m128d
11512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11513 _mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W)
11515 return (__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) __A,
11516 (__v2df) __W,
11517 (__mmask8) __U);
11520 extern __inline __m128
11521 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11522 _mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W)
11524 return (__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) __A,
11525 (__v4sf) __W,
11526 (__mmask8) __U);
11529 extern __inline __m128i
11530 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11531 _mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W)
11533 return (__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) __A,
11534 (__v2di) __W,
11535 (__mmask8) __U);
11538 extern __inline __m128i
11539 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11540 _mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W)
11542 return (__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) __A,
11543 (__v4si) __W,
11544 (__mmask8) __U);
11547 extern __inline __mmask8
11548 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11549 _mm256_cmp_epi64_mask (__m256i __X, __m256i __Y, const int __P)
11551 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
11552 (__v4di) __Y, __P,
11553 (__mmask8) -1);
11556 extern __inline __mmask8
11557 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11558 _mm256_cmp_epi32_mask (__m256i __X, __m256i __Y, const int __P)
11560 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11561 (__v8si) __Y, __P,
11562 (__mmask8) -1);
11565 extern __inline __mmask8
11566 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11567 _mm256_cmp_epu64_mask (__m256i __X, __m256i __Y, const int __P)
11569 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11570 (__v4di) __Y, __P,
11571 (__mmask8) -1);
11574 extern __inline __mmask8
11575 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11576 _mm256_cmp_epu32_mask (__m256i __X, __m256i __Y, const int __P)
11578 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11579 (__v8si) __Y, __P,
11580 (__mmask8) -1);
11583 extern __inline __mmask8
11584 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11585 _mm256_cmp_pd_mask (__m256d __X, __m256d __Y, const int __P)
11587 return (__mmask8) __builtin_ia32_cmppd256_mask ((__v4df) __X,
11588 (__v4df) __Y, __P,
11589 (__mmask8) -1);
11592 extern __inline __mmask8
11593 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11594 _mm256_cmp_ps_mask (__m256 __X, __m256 __Y, const int __P)
11596 return (__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf) __X,
11597 (__v8sf) __Y, __P,
11598 (__mmask8) -1);
11601 extern __inline __mmask8
11602 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11603 _mm256_mask_cmp_epi64_mask (__mmask8 __U, __m256i __X, __m256i __Y,
11604 const int __P)
11606 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
11607 (__v4di) __Y, __P,
11608 (__mmask8) __U);
11611 extern __inline __mmask8
11612 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11613 _mm256_mask_cmp_epi32_mask (__mmask8 __U, __m256i __X, __m256i __Y,
11614 const int __P)
11616 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11617 (__v8si) __Y, __P,
11618 (__mmask8) __U);
11621 extern __inline __mmask8
11622 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11623 _mm256_mask_cmp_epu64_mask (__mmask8 __U, __m256i __X, __m256i __Y,
11624 const int __P)
11626 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11627 (__v4di) __Y, __P,
11628 (__mmask8) __U);
11631 extern __inline __mmask8
11632 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11633 _mm256_mask_cmp_epu32_mask (__mmask8 __U, __m256i __X, __m256i __Y,
11634 const int __P)
11636 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11637 (__v8si) __Y, __P,
11638 (__mmask8) __U);
11641 extern __inline __mmask8
11642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11643 _mm256_mask_cmp_pd_mask (__mmask8 __U, __m256d __X, __m256d __Y,
11644 const int __P)
11646 return (__mmask8) __builtin_ia32_cmppd256_mask ((__v4df) __X,
11647 (__v4df) __Y, __P,
11648 (__mmask8) __U);
11651 extern __inline __mmask8
11652 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11653 _mm256_mask_cmp_ps_mask (__mmask8 __U, __m256 __X, __m256 __Y,
11654 const int __P)
11656 return (__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf) __X,
11657 (__v8sf) __Y, __P,
11658 (__mmask8) __U);
11661 extern __inline __mmask8
11662 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11663 _mm_cmp_epi64_mask (__m128i __X, __m128i __Y, const int __P)
11665 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
11666 (__v2di) __Y, __P,
11667 (__mmask8) -1);
11670 extern __inline __mmask8
11671 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11672 _mm_cmp_epi32_mask (__m128i __X, __m128i __Y, const int __P)
11674 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
11675 (__v4si) __Y, __P,
11676 (__mmask8) -1);
11679 extern __inline __mmask8
11680 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11681 _mm_cmp_epu64_mask (__m128i __X, __m128i __Y, const int __P)
11683 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
11684 (__v2di) __Y, __P,
11685 (__mmask8) -1);
11688 extern __inline __mmask8
11689 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11690 _mm_cmp_epu32_mask (__m128i __X, __m128i __Y, const int __P)
11692 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
11693 (__v4si) __Y, __P,
11694 (__mmask8) -1);
11697 extern __inline __mmask8
11698 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11699 _mm_cmp_pd_mask (__m128d __X, __m128d __Y, const int __P)
11701 return (__mmask8) __builtin_ia32_cmppd128_mask ((__v2df) __X,
11702 (__v2df) __Y, __P,
11703 (__mmask8) -1);
11706 extern __inline __mmask8
11707 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11708 _mm_cmp_ps_mask (__m128 __X, __m128 __Y, const int __P)
11710 return (__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf) __X,
11711 (__v4sf) __Y, __P,
11712 (__mmask8) -1);
11715 extern __inline __mmask8
11716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11717 _mm_mask_cmp_epi64_mask (__mmask8 __U, __m128i __X, __m128i __Y,
11718 const int __P)
11720 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
11721 (__v2di) __Y, __P,
11722 (__mmask8) __U);
11725 extern __inline __mmask8
11726 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11727 _mm_mask_cmp_epi32_mask (__mmask8 __U, __m128i __X, __m128i __Y,
11728 const int __P)
11730 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
11731 (__v4si) __Y, __P,
11732 (__mmask8) __U);
11735 extern __inline __mmask8
11736 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11737 _mm_mask_cmp_epu64_mask (__mmask8 __U, __m128i __X, __m128i __Y,
11738 const int __P)
11740 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
11741 (__v2di) __Y, __P,
11742 (__mmask8) __U);
11745 extern __inline __mmask8
11746 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11747 _mm_mask_cmp_epu32_mask (__mmask8 __U, __m128i __X, __m128i __Y,
11748 const int __P)
11750 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
11751 (__v4si) __Y, __P,
11752 (__mmask8) __U);
11755 extern __inline __mmask8
11756 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11757 _mm_mask_cmp_pd_mask (__mmask8 __U, __m128d __X, __m128d __Y,
11758 const int __P)
11760 return (__mmask8) __builtin_ia32_cmppd128_mask ((__v2df) __X,
11761 (__v2df) __Y, __P,
11762 (__mmask8) __U);
11765 extern __inline __mmask8
11766 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11767 _mm_mask_cmp_ps_mask (__mmask8 __U, __m128 __X, __m128 __Y,
11768 const int __P)
11770 return (__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf) __X,
11771 (__v4sf) __Y, __P,
11772 (__mmask8) __U);
11775 extern __inline __m256d
11776 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11777 _mm256_permutex_pd (__m256d __X, const int __M)
11779 return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __M,
11780 (__v4df)
11781 _mm256_undefined_pd (),
11782 (__mmask8) -1);
11785 extern __inline __mmask8
11786 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11787 _mm256_mask_cmpneq_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11789 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11790 (__v8si) __Y, 4,
11791 (__mmask8) __M);
11794 extern __inline __mmask8
11795 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11796 _mm256_cmpneq_epu32_mask (__m256i __X, __m256i __Y)
11798 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11799 (__v8si) __Y, 4,
11800 (__mmask8) - 1);
11803 extern __inline __mmask8
11804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11805 _mm256_mask_cmplt_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11807 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11808 (__v8si) __Y, 1,
11809 (__mmask8) __M);
11812 extern __inline __mmask8
11813 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11814 _mm256_cmplt_epu32_mask (__m256i __X, __m256i __Y)
11816 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11817 (__v8si) __Y, 1,
11818 (__mmask8) - 1);
11821 extern __inline __mmask8
11822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11823 _mm256_mask_cmpge_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11825 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11826 (__v8si) __Y, 5,
11827 (__mmask8) __M);
11830 extern __inline __mmask8
11831 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11832 _mm256_cmpge_epu32_mask (__m256i __X, __m256i __Y)
11834 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11835 (__v8si) __Y, 5,
11836 (__mmask8) - 1);
11839 extern __inline __mmask8
11840 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11841 _mm256_mask_cmple_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11843 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11844 (__v8si) __Y, 2,
11845 (__mmask8) __M);
11848 extern __inline __mmask8
11849 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11850 _mm256_cmple_epu32_mask (__m256i __X, __m256i __Y)
11852 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11853 (__v8si) __Y, 2,
11854 (__mmask8) - 1);
11857 extern __inline __mmask8
11858 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11859 _mm256_mask_cmpneq_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11861 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11862 (__v4di) __Y, 4,
11863 (__mmask8) __M);
11866 extern __inline __mmask8
11867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11868 _mm256_cmpneq_epu64_mask (__m256i __X, __m256i __Y)
11870 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11871 (__v4di) __Y, 4,
11872 (__mmask8) - 1);
11875 extern __inline __mmask8
11876 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11877 _mm256_mask_cmplt_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11879 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11880 (__v4di) __Y, 1,
11881 (__mmask8) __M);
11884 extern __inline __mmask8
11885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11886 _mm256_cmplt_epu64_mask (__m256i __X, __m256i __Y)
11888 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11889 (__v4di) __Y, 1,
11890 (__mmask8) - 1);
11893 extern __inline __mmask8
11894 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11895 _mm256_mask_cmpge_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11897 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11898 (__v4di) __Y, 5,
11899 (__mmask8) __M);
11902 extern __inline __mmask8
11903 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11904 _mm256_cmpge_epu64_mask (__m256i __X, __m256i __Y)
11906 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11907 (__v4di) __Y, 5,
11908 (__mmask8) - 1);
11911 extern __inline __mmask8
11912 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11913 _mm256_mask_cmple_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11915 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11916 (__v4di) __Y, 2,
11917 (__mmask8) __M);
11920 extern __inline __mmask8
11921 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11922 _mm256_cmple_epu64_mask (__m256i __X, __m256i __Y)
11924 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11925 (__v4di) __Y, 2,
11926 (__mmask8) - 1);
11929 extern __inline __mmask8
11930 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11931 _mm256_mask_cmpneq_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11933 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11934 (__v8si) __Y, 4,
11935 (__mmask8) __M);
11938 extern __inline __mmask8
11939 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11940 _mm256_cmpneq_epi32_mask (__m256i __X, __m256i __Y)
11942 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11943 (__v8si) __Y, 4,
11944 (__mmask8) - 1);
11947 extern __inline __mmask8
11948 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11949 _mm256_mask_cmplt_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11951 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11952 (__v8si) __Y, 1,
11953 (__mmask8) __M);
11956 extern __inline __mmask8
11957 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11958 _mm256_cmplt_epi32_mask (__m256i __X, __m256i __Y)
11960 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11961 (__v8si) __Y, 1,
11962 (__mmask8) - 1);
11965 extern __inline __mmask8
11966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11967 _mm256_mask_cmpge_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11969 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11970 (__v8si) __Y, 5,
11971 (__mmask8) __M);
11974 extern __inline __mmask8
11975 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11976 _mm256_cmpge_epi32_mask (__m256i __X, __m256i __Y)
11978 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11979 (__v8si) __Y, 5,
11980 (__mmask8) - 1);
11983 extern __inline __mmask8
11984 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11985 _mm256_mask_cmple_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11987 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11988 (__v8si) __Y, 2,
11989 (__mmask8) __M);
11992 extern __inline __mmask8
11993 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11994 _mm256_cmple_epi32_mask (__m256i __X, __m256i __Y)
11996 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11997 (__v8si) __Y, 2,
11998 (__mmask8) - 1);
12001 extern __inline __mmask8
12002 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12003 _mm256_mask_cmpneq_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
12005 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12006 (__v4di) __Y, 4,
12007 (__mmask8) __M);
12010 extern __inline __mmask8
12011 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12012 _mm256_cmpneq_epi64_mask (__m256i __X, __m256i __Y)
12014 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12015 (__v4di) __Y, 4,
12016 (__mmask8) - 1);
12019 extern __inline __mmask8
12020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12021 _mm256_mask_cmplt_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
12023 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12024 (__v4di) __Y, 1,
12025 (__mmask8) __M);
12028 extern __inline __mmask8
12029 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12030 _mm256_cmplt_epi64_mask (__m256i __X, __m256i __Y)
12032 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12033 (__v4di) __Y, 1,
12034 (__mmask8) - 1);
12037 extern __inline __mmask8
12038 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12039 _mm256_mask_cmpge_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
12041 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12042 (__v4di) __Y, 5,
12043 (__mmask8) __M);
12046 extern __inline __mmask8
12047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12048 _mm256_cmpge_epi64_mask (__m256i __X, __m256i __Y)
12050 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12051 (__v4di) __Y, 5,
12052 (__mmask8) - 1);
12055 extern __inline __mmask8
12056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12057 _mm256_mask_cmple_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
12059 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12060 (__v4di) __Y, 2,
12061 (__mmask8) __M);
12064 extern __inline __mmask8
12065 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12066 _mm256_cmple_epi64_mask (__m256i __X, __m256i __Y)
12068 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12069 (__v4di) __Y, 2,
12070 (__mmask8) - 1);
12073 extern __inline __mmask8
12074 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12075 _mm_mask_cmpneq_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12077 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12078 (__v4si) __Y, 4,
12079 (__mmask8) __M);
12082 extern __inline __mmask8
12083 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12084 _mm_cmpneq_epu32_mask (__m128i __X, __m128i __Y)
12086 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12087 (__v4si) __Y, 4,
12088 (__mmask8) - 1);
12091 extern __inline __mmask8
12092 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12093 _mm_mask_cmplt_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12095 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12096 (__v4si) __Y, 1,
12097 (__mmask8) __M);
12100 extern __inline __mmask8
12101 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12102 _mm_cmplt_epu32_mask (__m128i __X, __m128i __Y)
12104 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12105 (__v4si) __Y, 1,
12106 (__mmask8) - 1);
12109 extern __inline __mmask8
12110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12111 _mm_mask_cmpge_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12113 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12114 (__v4si) __Y, 5,
12115 (__mmask8) __M);
12118 extern __inline __mmask8
12119 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12120 _mm_cmpge_epu32_mask (__m128i __X, __m128i __Y)
12122 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12123 (__v4si) __Y, 5,
12124 (__mmask8) - 1);
12127 extern __inline __mmask8
12128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12129 _mm_mask_cmple_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12131 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12132 (__v4si) __Y, 2,
12133 (__mmask8) __M);
12136 extern __inline __mmask8
12137 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12138 _mm_cmple_epu32_mask (__m128i __X, __m128i __Y)
12140 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12141 (__v4si) __Y, 2,
12142 (__mmask8) - 1);
12145 extern __inline __mmask8
12146 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12147 _mm_mask_cmpneq_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12149 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12150 (__v2di) __Y, 4,
12151 (__mmask8) __M);
12154 extern __inline __mmask8
12155 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12156 _mm_cmpneq_epu64_mask (__m128i __X, __m128i __Y)
12158 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12159 (__v2di) __Y, 4,
12160 (__mmask8) - 1);
12163 extern __inline __mmask8
12164 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12165 _mm_mask_cmplt_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12167 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12168 (__v2di) __Y, 1,
12169 (__mmask8) __M);
12172 extern __inline __mmask8
12173 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12174 _mm_cmplt_epu64_mask (__m128i __X, __m128i __Y)
12176 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12177 (__v2di) __Y, 1,
12178 (__mmask8) - 1);
12181 extern __inline __mmask8
12182 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12183 _mm_mask_cmpge_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12185 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12186 (__v2di) __Y, 5,
12187 (__mmask8) __M);
12190 extern __inline __mmask8
12191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12192 _mm_cmpge_epu64_mask (__m128i __X, __m128i __Y)
12194 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12195 (__v2di) __Y, 5,
12196 (__mmask8) - 1);
12199 extern __inline __mmask8
12200 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12201 _mm_mask_cmple_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12203 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12204 (__v2di) __Y, 2,
12205 (__mmask8) __M);
12208 extern __inline __mmask8
12209 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12210 _mm_cmple_epu64_mask (__m128i __X, __m128i __Y)
12212 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12213 (__v2di) __Y, 2,
12214 (__mmask8) - 1);
12217 extern __inline __mmask8
12218 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12219 _mm_mask_cmpneq_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12221 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12222 (__v4si) __Y, 4,
12223 (__mmask8) __M);
12226 extern __inline __mmask8
12227 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12228 _mm_cmpneq_epi32_mask (__m128i __X, __m128i __Y)
12230 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12231 (__v4si) __Y, 4,
12232 (__mmask8) - 1);
12235 extern __inline __mmask8
12236 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12237 _mm_mask_cmplt_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12239 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12240 (__v4si) __Y, 1,
12241 (__mmask8) __M);
12244 extern __inline __mmask8
12245 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12246 _mm_cmplt_epi32_mask (__m128i __X, __m128i __Y)
12248 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12249 (__v4si) __Y, 1,
12250 (__mmask8) - 1);
12253 extern __inline __mmask8
12254 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12255 _mm_mask_cmpge_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12257 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12258 (__v4si) __Y, 5,
12259 (__mmask8) __M);
12262 extern __inline __mmask8
12263 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12264 _mm_cmpge_epi32_mask (__m128i __X, __m128i __Y)
12266 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12267 (__v4si) __Y, 5,
12268 (__mmask8) - 1);
12271 extern __inline __mmask8
12272 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12273 _mm_mask_cmple_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12275 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12276 (__v4si) __Y, 2,
12277 (__mmask8) __M);
12280 extern __inline __mmask8
12281 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12282 _mm_cmple_epi32_mask (__m128i __X, __m128i __Y)
12284 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12285 (__v4si) __Y, 2,
12286 (__mmask8) - 1);
12289 extern __inline __mmask8
12290 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12291 _mm_mask_cmpneq_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12293 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12294 (__v2di) __Y, 4,
12295 (__mmask8) __M);
12298 extern __inline __mmask8
12299 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12300 _mm_cmpneq_epi64_mask (__m128i __X, __m128i __Y)
12302 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12303 (__v2di) __Y, 4,
12304 (__mmask8) - 1);
12307 extern __inline __mmask8
12308 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12309 _mm_mask_cmplt_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12311 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12312 (__v2di) __Y, 1,
12313 (__mmask8) __M);
12316 extern __inline __mmask8
12317 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12318 _mm_cmplt_epi64_mask (__m128i __X, __m128i __Y)
12320 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12321 (__v2di) __Y, 1,
12322 (__mmask8) - 1);
12325 extern __inline __mmask8
12326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12327 _mm_mask_cmpge_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12329 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12330 (__v2di) __Y, 5,
12331 (__mmask8) __M);
12334 extern __inline __mmask8
12335 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12336 _mm_cmpge_epi64_mask (__m128i __X, __m128i __Y)
12338 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12339 (__v2di) __Y, 5,
12340 (__mmask8) - 1);
12343 extern __inline __mmask8
12344 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12345 _mm_mask_cmple_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12347 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12348 (__v2di) __Y, 2,
12349 (__mmask8) __M);
12352 extern __inline __mmask8
12353 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12354 _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
12356 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12357 (__v2di) __Y, 2,
12358 (__mmask8) - 1);
12361 #else
12362 #define _mm256_permutex_pd(X, M) \
12363 ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(X), (int)(M), \
12364 (__v4df)(__m256d)_mm256_undefined_pd(),\
12365 (__mmask8)-1))
12367 #define _mm256_maskz_permutex_epi64(M, X, I) \
12368 ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
12369 (int)(I), \
12370 (__v4di)(__m256i) \
12371 (_mm256_setzero_si256()),\
12372 (__mmask8)(M)))
12374 #define _mm256_mask_permutex_epi64(W, M, X, I) \
12375 ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
12376 (int)(I), \
12377 (__v4di)(__m256i)(W), \
12378 (__mmask8)(M)))
12380 #define _mm256_insertf32x4(X, Y, C) \
12381 ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \
12382 (__v4sf)(__m128) (Y), (int) (C), \
12383 (__v8sf)(__m256)_mm256_setzero_ps(), \
12384 (__mmask8)-1))
12386 #define _mm256_mask_insertf32x4(W, U, X, Y, C) \
12387 ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \
12388 (__v4sf)(__m128) (Y), (int) (C), \
12389 (__v8sf)(__m256)(W), \
12390 (__mmask8)(U)))
12392 #define _mm256_maskz_insertf32x4(U, X, Y, C) \
12393 ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \
12394 (__v4sf)(__m128) (Y), (int) (C), \
12395 (__v8sf)(__m256)_mm256_setzero_ps(), \
12396 (__mmask8)(U)))
12398 #define _mm256_inserti32x4(X, Y, C) \
12399 ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
12400 (__v4si)(__m128i) (Y), (int) (C), \
12401 (__v8si)(__m256i)_mm256_setzero_si256(), \
12402 (__mmask8)-1))
12404 #define _mm256_mask_inserti32x4(W, U, X, Y, C) \
12405 ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
12406 (__v4si)(__m128i) (Y), (int) (C), \
12407 (__v8si)(__m256i)(W), \
12408 (__mmask8)(U)))
12410 #define _mm256_maskz_inserti32x4(U, X, Y, C) \
12411 ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
12412 (__v4si)(__m128i) (Y), (int) (C), \
12413 (__v8si)(__m256i)_mm256_setzero_si256(), \
12414 (__mmask8)(U)))
12416 #define _mm256_extractf32x4_ps(X, C) \
12417 ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
12418 (int) (C), \
12419 (__v4sf)(__m128)_mm_setzero_ps(), \
12420 (__mmask8)-1))
12422 #define _mm256_mask_extractf32x4_ps(W, U, X, C) \
12423 ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
12424 (int) (C), \
12425 (__v4sf)(__m128)(W), \
12426 (__mmask8)(U)))
12428 #define _mm256_maskz_extractf32x4_ps(U, X, C) \
12429 ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
12430 (int) (C), \
12431 (__v4sf)(__m128)_mm_setzero_ps(), \
12432 (__mmask8)(U)))
12434 #define _mm256_extracti32x4_epi32(X, C) \
12435 ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
12436 (int) (C), (__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)-1))
12438 #define _mm256_mask_extracti32x4_epi32(W, U, X, C) \
12439 ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
12440 (int) (C), (__v4si)(__m128i)(W), (__mmask8)(U)))
12442 #define _mm256_maskz_extracti32x4_epi32(U, X, C) \
12443 ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
12444 (int) (C), (__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)(U)))
12446 #define _mm256_shuffle_i64x2(X, Y, C) \
12447 ((__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X), \
12448 (__v4di)(__m256i)(Y), (int)(C), \
12449 (__v4di)(__m256i)_mm256_setzero_si256 (), \
12450 (__mmask8)-1))
12452 #define _mm256_mask_shuffle_i64x2(W, U, X, Y, C) \
12453 ((__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X), \
12454 (__v4di)(__m256i)(Y), (int)(C), \
12455 (__v4di)(__m256i)(W),\
12456 (__mmask8)(U)))
12458 #define _mm256_maskz_shuffle_i64x2(U, X, Y, C) \
12459 ((__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X), \
12460 (__v4di)(__m256i)(Y), (int)(C), \
12461 (__v4di)(__m256i)_mm256_setzero_si256 (), \
12462 (__mmask8)(U)))
12464 #define _mm256_shuffle_i32x4(X, Y, C) \
12465 ((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \
12466 (__v8si)(__m256i)(Y), (int)(C), \
12467 (__v8si)(__m256i)_mm256_setzero_si256(), \
12468 (__mmask8)-1))
12470 #define _mm256_mask_shuffle_i32x4(W, U, X, Y, C) \
12471 ((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \
12472 (__v8si)(__m256i)(Y), (int)(C), \
12473 (__v8si)(__m256i)(W), \
12474 (__mmask8)(U)))
12476 #define _mm256_maskz_shuffle_i32x4(U, X, Y, C) \
12477 ((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \
12478 (__v8si)(__m256i)(Y), (int)(C), \
12479 (__v8si)(__m256i)_mm256_setzero_si256(), \
12480 (__mmask8)(U)))
12482 #define _mm256_shuffle_f64x2(X, Y, C) \
12483 ((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \
12484 (__v4df)(__m256d)(Y), (int)(C), \
12485 (__v4df)(__m256d)_mm256_setzero_pd(), \
12486 (__mmask8)-1))
12488 #define _mm256_mask_shuffle_f64x2(W, U, X, Y, C) \
12489 ((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \
12490 (__v4df)(__m256d)(Y), (int)(C), \
12491 (__v4df)(__m256d)(W), \
12492 (__mmask8)(U)))
12494 #define _mm256_maskz_shuffle_f64x2(U, X, Y, C) \
12495 ((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \
12496 (__v4df)(__m256d)(Y), (int)(C), \
12497 (__v4df)(__m256d)_mm256_setzero_pd(), \
12498 (__mmask8)(U)))
12500 #define _mm256_shuffle_f32x4(X, Y, C) \
12501 ((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \
12502 (__v8sf)(__m256)(Y), (int)(C), \
12503 (__v8sf)(__m256)_mm256_setzero_ps(), \
12504 (__mmask8)-1))
12506 #define _mm256_mask_shuffle_f32x4(W, U, X, Y, C) \
12507 ((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \
12508 (__v8sf)(__m256)(Y), (int)(C), \
12509 (__v8sf)(__m256)(W), \
12510 (__mmask8)(U)))
12512 #define _mm256_maskz_shuffle_f32x4(U, X, Y, C) \
12513 ((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \
12514 (__v8sf)(__m256)(Y), (int)(C), \
12515 (__v8sf)(__m256)_mm256_setzero_ps(), \
12516 (__mmask8)(U)))
12518 #define _mm256_mask_shuffle_pd(W, U, A, B, C) \
12519 ((__m256d)__builtin_ia32_shufpd256_mask ((__v4df)(__m256d)(A), \
12520 (__v4df)(__m256d)(B), (int)(C), \
12521 (__v4df)(__m256d)(W), \
12522 (__mmask8)(U)))
12524 #define _mm256_maskz_shuffle_pd(U, A, B, C) \
12525 ((__m256d)__builtin_ia32_shufpd256_mask ((__v4df)(__m256d)(A), \
12526 (__v4df)(__m256d)(B), (int)(C), \
12527 (__v4df)(__m256d)_mm256_setzero_pd(),\
12528 (__mmask8)(U)))
12530 #define _mm_mask_shuffle_pd(W, U, A, B, C) \
12531 ((__m128d)__builtin_ia32_shufpd128_mask ((__v2df)(__m128d)(A), \
12532 (__v2df)(__m128d)(B), (int)(C), \
12533 (__v2df)(__m128d)(W), \
12534 (__mmask8)(U)))
12536 #define _mm_maskz_shuffle_pd(U, A, B, C) \
12537 ((__m128d)__builtin_ia32_shufpd128_mask ((__v2df)(__m128d)(A), \
12538 (__v2df)(__m128d)(B), (int)(C), \
12539 (__v2df)(__m128d)_mm_setzero_pd(), \
12540 (__mmask8)(U)))
12542 #define _mm256_mask_shuffle_ps(W, U, A, B, C) \
12543 ((__m256) __builtin_ia32_shufps256_mask ((__v8sf)(__m256)(A), \
12544 (__v8sf)(__m256)(B), (int)(C), \
12545 (__v8sf)(__m256)(W), \
12546 (__mmask8)(U)))
12548 #define _mm256_maskz_shuffle_ps(U, A, B, C) \
12549 ((__m256) __builtin_ia32_shufps256_mask ((__v8sf)(__m256)(A), \
12550 (__v8sf)(__m256)(B), (int)(C), \
12551 (__v8sf)(__m256)_mm256_setzero_ps(), \
12552 (__mmask8)(U)))
12554 #define _mm_mask_shuffle_ps(W, U, A, B, C) \
12555 ((__m128) __builtin_ia32_shufps128_mask ((__v4sf)(__m128)(A), \
12556 (__v4sf)(__m128)(B), (int)(C), \
12557 (__v4sf)(__m128)(W), \
12558 (__mmask8)(U)))
12560 #define _mm_maskz_shuffle_ps(U, A, B, C) \
12561 ((__m128) __builtin_ia32_shufps128_mask ((__v4sf)(__m128)(A), \
12562 (__v4sf)(__m128)(B), (int)(C), \
12563 (__v4sf)(__m128)_mm_setzero_ps(), \
12564 (__mmask8)(U)))
12566 #define _mm256_fixupimm_pd(X, Y, Z, C) \
12567 ((__m256d)__builtin_ia32_fixupimmpd256_mask ((__v4df)(__m256d)(X), \
12568 (__v4df)(__m256d)(Y), \
12569 (__v4di)(__m256i)(Z), (int)(C), \
12570 (__mmask8)(-1)))
12572 #define _mm256_mask_fixupimm_pd(X, U, Y, Z, C) \
12573 ((__m256d)__builtin_ia32_fixupimmpd256_mask ((__v4df)(__m256d)(X), \
12574 (__v4df)(__m256d)(Y), \
12575 (__v4di)(__m256i)(Z), (int)(C), \
12576 (__mmask8)(U)))
12578 #define _mm256_maskz_fixupimm_pd(U, X, Y, Z, C) \
12579 ((__m256d)__builtin_ia32_fixupimmpd256_maskz ((__v4df)(__m256d)(X), \
12580 (__v4df)(__m256d)(Y), \
12581 (__v4di)(__m256i)(Z), (int)(C),\
12582 (__mmask8)(U)))
12584 #define _mm256_fixupimm_ps(X, Y, Z, C) \
12585 ((__m256)__builtin_ia32_fixupimmps256_mask ((__v8sf)(__m256)(X), \
12586 (__v8sf)(__m256)(Y), \
12587 (__v8si)(__m256i)(Z), (int)(C), \
12588 (__mmask8)(-1)))
12591 #define _mm256_mask_fixupimm_ps(X, U, Y, Z, C) \
12592 ((__m256)__builtin_ia32_fixupimmps256_mask ((__v8sf)(__m256)(X), \
12593 (__v8sf)(__m256)(Y), \
12594 (__v8si)(__m256i)(Z), (int)(C), \
12595 (__mmask8)(U)))
12597 #define _mm256_maskz_fixupimm_ps(U, X, Y, Z, C) \
12598 ((__m256)__builtin_ia32_fixupimmps256_maskz ((__v8sf)(__m256)(X), \
12599 (__v8sf)(__m256)(Y), \
12600 (__v8si)(__m256i)(Z), (int)(C),\
12601 (__mmask8)(U)))
12603 #define _mm_fixupimm_pd(X, Y, Z, C) \
12604 ((__m128d)__builtin_ia32_fixupimmpd128_mask ((__v2df)(__m128d)(X), \
12605 (__v2df)(__m128d)(Y), \
12606 (__v2di)(__m128i)(Z), (int)(C), \
12607 (__mmask8)(-1)))
12610 #define _mm_mask_fixupimm_pd(X, U, Y, Z, C) \
12611 ((__m128d)__builtin_ia32_fixupimmpd128_mask ((__v2df)(__m128d)(X), \
12612 (__v2df)(__m128d)(Y), \
12613 (__v2di)(__m128i)(Z), (int)(C), \
12614 (__mmask8)(U)))
12616 #define _mm_maskz_fixupimm_pd(U, X, Y, Z, C) \
12617 ((__m128d)__builtin_ia32_fixupimmpd128_maskz ((__v2df)(__m128d)(X), \
12618 (__v2df)(__m128d)(Y), \
12619 (__v2di)(__m128i)(Z), (int)(C),\
12620 (__mmask8)(U)))
12622 #define _mm_fixupimm_ps(X, Y, Z, C) \
12623 ((__m128)__builtin_ia32_fixupimmps128_mask ((__v4sf)(__m128)(X), \
12624 (__v4sf)(__m128)(Y), \
12625 (__v4si)(__m128i)(Z), (int)(C), \
12626 (__mmask8)(-1)))
12628 #define _mm_mask_fixupimm_ps(X, U, Y, Z, C) \
12629 ((__m128)__builtin_ia32_fixupimmps128_mask ((__v4sf)(__m128)(X), \
12630 (__v4sf)(__m128)(Y), \
12631 (__v4si)(__m128i)(Z), (int)(C),\
12632 (__mmask8)(U)))
12634 #define _mm_maskz_fixupimm_ps(U, X, Y, Z, C) \
12635 ((__m128)__builtin_ia32_fixupimmps128_maskz ((__v4sf)(__m128)(X), \
12636 (__v4sf)(__m128)(Y), \
12637 (__v4si)(__m128i)(Z), (int)(C),\
12638 (__mmask8)(U)))
12640 #define _mm256_mask_srli_epi32(W, U, A, B) \
12641 ((__m256i) __builtin_ia32_psrldi256_mask ((__v8si)(__m256i)(A), \
12642 (int)(B), (__v8si)(__m256i)(W), (__mmask8)(U)))
12644 #define _mm256_maskz_srli_epi32(U, A, B) \
12645 ((__m256i) __builtin_ia32_psrldi256_mask ((__v8si)(__m256i)(A), \
12646 (int)(B), (__v8si)_mm256_setzero_si256(), (__mmask8)(U)))
12648 #define _mm_mask_srli_epi32(W, U, A, B) \
12649 ((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A), \
12650 (int)(B), (__v4si)(__m128i)(W), (__mmask8)(U)))
12652 #define _mm_maskz_srli_epi32(U, A, B) \
12653 ((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A), \
12654 (int)(B), (__v4si)_mm_setzero_si128(), (__mmask8)(U)))
12656 #define _mm256_mask_srli_epi64(W, U, A, B) \
12657 ((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A), \
12658 (int)(B), (__v4di)(__m256i)(W), (__mmask8)(U)))
12660 #define _mm256_maskz_srli_epi64(U, A, B) \
12661 ((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A), \
12662 (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)(U)))
12664 #define _mm_mask_srli_epi64(W, U, A, B) \
12665 ((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A), \
12666 (int)(B), (__v2di)(__m128i)(W), (__mmask8)(U)))
12668 #define _mm_maskz_srli_epi64(U, A, B) \
12669 ((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A), \
12670 (int)(B), (__v2di)_mm_setzero_si128(), (__mmask8)(U)))
12672 #define _mm256_mask_slli_epi32(W, U, X, C) \
12673 ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\
12674 (__v8si)(__m256i)(W),\
12675 (__mmask8)(U)))
12677 #define _mm256_maskz_slli_epi32(U, X, C) \
12678 ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\
12679 (__v8si)(__m256i)_mm256_setzero_si256(),\
12680 (__mmask8)(U)))
12682 #define _mm256_mask_slli_epi64(W, U, X, C) \
12683 ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\
12684 (__v4di)(__m256i)(W),\
12685 (__mmask8)(U)))
12687 #define _mm256_maskz_slli_epi64(U, X, C) \
12688 ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\
12689 (__v4di)(__m256i)_mm256_setzero_si256 (),\
12690 (__mmask8)(U)))
12692 #define _mm_mask_slli_epi32(W, U, X, C) \
12693 ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X), (int)(C),\
12694 (__v4si)(__m128i)(W),\
12695 (__mmask8)(U)))
12697 #define _mm_maskz_slli_epi32(U, X, C) \
12698 ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X), (int)(C),\
12699 (__v4si)(__m128i)_mm_setzero_si128 (),\
12700 (__mmask8)(U)))
12702 #define _mm_mask_slli_epi64(W, U, X, C) \
12703 ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C),\
12704 (__v2di)(__m128i)(W),\
12705 (__mmask8)(U)))
12707 #define _mm_maskz_slli_epi64(U, X, C) \
12708 ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C),\
12709 (__v2di)(__m128i)_mm_setzero_di(),\
12710 (__mmask8)(U)))
12712 #define _mm256_ternarylogic_epi64(A, B, C, I) \
12713 ((__m256i) __builtin_ia32_pternlogq256_mask ((__v4di)(__m256i)(A), \
12714 (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)-1))
12716 #define _mm256_mask_ternarylogic_epi64(A, U, B, C, I) \
12717 ((__m256i) __builtin_ia32_pternlogq256_mask ((__v4di)(__m256i)(A), \
12718 (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)(U)))
12720 #define _mm256_maskz_ternarylogic_epi64(U, A, B, C, I) \
12721 ((__m256i) __builtin_ia32_pternlogq256_maskz ((__v4di)(__m256i)(A), \
12722 (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)(U)))
12724 #define _mm256_ternarylogic_epi32(A, B, C, I) \
12725 ((__m256i) __builtin_ia32_pternlogd256_mask ((__v8si)(__m256i)(A), \
12726 (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)-1))
12728 #define _mm256_mask_ternarylogic_epi32(A, U, B, C, I) \
12729 ((__m256i) __builtin_ia32_pternlogd256_mask ((__v8si)(__m256i)(A), \
12730 (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)(U)))
12732 #define _mm256_maskz_ternarylogic_epi32(U, A, B, C, I) \
12733 ((__m256i) __builtin_ia32_pternlogd256_maskz ((__v8si)(__m256i)(A), \
12734 (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)(U)))
12736 #define _mm_ternarylogic_epi64(A, B, C, I) \
12737 ((__m128i) __builtin_ia32_pternlogq128_mask ((__v2di)(__m128i)(A), \
12738 (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)-1))
12740 #define _mm_mask_ternarylogic_epi64(A, U, B, C, I) \
12741 ((__m128i) __builtin_ia32_pternlogq128_mask ((__v2di)(__m128i)(A), \
12742 (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)(U)))
12744 #define _mm_maskz_ternarylogic_epi64(U, A, B, C, I) \
12745 ((__m128i) __builtin_ia32_pternlogq128_maskz ((__v2di)(__m128i)(A), \
12746 (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)(U)))
12748 #define _mm_ternarylogic_epi32(A, B, C, I) \
12749 ((__m128i) __builtin_ia32_pternlogd128_mask ((__v4si)(__m128i)(A), \
12750 (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)-1))
12752 #define _mm_mask_ternarylogic_epi32(A, U, B, C, I) \
12753 ((__m128i) __builtin_ia32_pternlogd128_mask ((__v4si)(__m128i)(A), \
12754 (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)(U)))
12756 #define _mm_maskz_ternarylogic_epi32(U, A, B, C, I) \
12757 ((__m128i) __builtin_ia32_pternlogd128_maskz ((__v4si)(__m128i)(A), \
12758 (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)(U)))
12760 #define _mm256_roundscale_ps(A, B) \
12761 ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \
12762 (int)(B), (__v8sf)(__m256)_mm256_setzero_ps(), (__mmask8)-1))
12764 #define _mm256_mask_roundscale_ps(W, U, A, B) \
12765 ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \
12766 (int)(B), (__v8sf)(__m256)(W), (__mmask8)(U)))
12768 #define _mm256_maskz_roundscale_ps(U, A, B) \
12769 ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \
12770 (int)(B), (__v8sf)(__m256)_mm256_setzero_ps(), (__mmask8)(U)))
12772 #define _mm256_roundscale_pd(A, B) \
12773 ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \
12774 (int)(B), (__v4df)(__m256d)_mm256_setzero_pd(), (__mmask8)-1))
12776 #define _mm256_mask_roundscale_pd(W, U, A, B) \
12777 ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \
12778 (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
12780 #define _mm256_maskz_roundscale_pd(U, A, B) \
12781 ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \
12782 (int)(B), (__v4df)(__m256d)_mm256_setzero_pd(), (__mmask8)(U)))
12784 #define _mm_roundscale_ps(A, B) \
12785 ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \
12786 (int)(B), (__v4sf)(__m128)_mm_setzero_ps(), (__mmask8)-1))
12788 #define _mm_mask_roundscale_ps(W, U, A, B) \
12789 ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \
12790 (int)(B), (__v4sf)(__m128)(W), (__mmask8)(U)))
12792 #define _mm_maskz_roundscale_ps(U, A, B) \
12793 ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \
12794 (int)(B), (__v4sf)(__m128)_mm_setzero_ps(), (__mmask8)(U)))
12796 #define _mm_roundscale_pd(A, B) \
12797 ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \
12798 (int)(B), (__v2df)(__m128d)_mm_setzero_pd(), (__mmask8)-1))
12800 #define _mm_mask_roundscale_pd(W, U, A, B) \
12801 ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \
12802 (int)(B), (__v2df)(__m128d)(W), (__mmask8)(U)))
12804 #define _mm_maskz_roundscale_pd(U, A, B) \
12805 ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \
12806 (int)(B), (__v2df)(__m128d)_mm_setzero_pd(), (__mmask8)(U)))
12808 #define _mm256_getmant_ps(X, B, C) \
12809 ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \
12810 (int)(((C)<<2) | (B)), \
12811 (__v8sf)(__m256)_mm256_setzero_ps(), \
12812 (__mmask8)-1))
12814 #define _mm256_mask_getmant_ps(W, U, X, B, C) \
12815 ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \
12816 (int)(((C)<<2) | (B)), \
12817 (__v8sf)(__m256)(W), \
12818 (__mmask8)(U)))
12820 #define _mm256_maskz_getmant_ps(U, X, B, C) \
12821 ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \
12822 (int)(((C)<<2) | (B)), \
12823 (__v8sf)(__m256)_mm256_setzero_ps(), \
12824 (__mmask8)(U)))
12826 #define _mm_getmant_ps(X, B, C) \
12827 ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \
12828 (int)(((C)<<2) | (B)), \
12829 (__v4sf)(__m128)_mm_setzero_ps(), \
12830 (__mmask8)-1))
12832 #define _mm_mask_getmant_ps(W, U, X, B, C) \
12833 ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \
12834 (int)(((C)<<2) | (B)), \
12835 (__v4sf)(__m128)(W), \
12836 (__mmask8)(U)))
12838 #define _mm_maskz_getmant_ps(U, X, B, C) \
12839 ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \
12840 (int)(((C)<<2) | (B)), \
12841 (__v4sf)(__m128)_mm_setzero_ps(), \
12842 (__mmask8)(U)))
12844 #define _mm256_getmant_pd(X, B, C) \
12845 ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \
12846 (int)(((C)<<2) | (B)), \
12847 (__v4df)(__m256d)_mm256_setzero_pd(), \
12848 (__mmask8)-1))
12850 #define _mm256_mask_getmant_pd(W, U, X, B, C) \
12851 ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \
12852 (int)(((C)<<2) | (B)), \
12853 (__v4df)(__m256d)(W), \
12854 (__mmask8)(U)))
12856 #define _mm256_maskz_getmant_pd(U, X, B, C) \
12857 ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \
12858 (int)(((C)<<2) | (B)), \
12859 (__v4df)(__m256d)_mm256_setzero_pd(), \
12860 (__mmask8)(U)))
12862 #define _mm_getmant_pd(X, B, C) \
12863 ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \
12864 (int)(((C)<<2) | (B)), \
12865 (__v2df)(__m128d)_mm_setzero_pd(), \
12866 (__mmask8)-1))
12868 #define _mm_mask_getmant_pd(W, U, X, B, C) \
12869 ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \
12870 (int)(((C)<<2) | (B)), \
12871 (__v2df)(__m128d)(W), \
12872 (__mmask8)(U)))
12874 #define _mm_maskz_getmant_pd(U, X, B, C) \
12875 ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \
12876 (int)(((C)<<2) | (B)), \
12877 (__v2df)(__m128d)_mm_setzero_pd(), \
12878 (__mmask8)(U)))
12880 #define _mm256_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
12881 (__m256) __builtin_ia32_gather3siv8sf ((__v8sf)(__m256)V1OLD, \
12882 (float const *)ADDR, \
12883 (__v8si)(__m256i)INDEX, \
12884 (__mmask8)MASK, (int)SCALE)
12886 #define _mm_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
12887 (__m128) __builtin_ia32_gather3siv4sf ((__v4sf)(__m128)V1OLD, \
12888 (float const *)ADDR, \
12889 (__v4si)(__m128i)INDEX, \
12890 (__mmask8)MASK, (int)SCALE)
12892 #define _mm256_mmask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
12893 (__m256d) __builtin_ia32_gather3siv4df ((__v4df)(__m256d)V1OLD, \
12894 (double const *)ADDR, \
12895 (__v4si)(__m128i)INDEX, \
12896 (__mmask8)MASK, (int)SCALE)
12898 #define _mm_mmask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
12899 (__m128d) __builtin_ia32_gather3siv2df ((__v2df)(__m128d)V1OLD, \
12900 (double const *)ADDR, \
12901 (__v4si)(__m128i)INDEX, \
12902 (__mmask8)MASK, (int)SCALE)
12904 #define _mm256_mmask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
12905 (__m128) __builtin_ia32_gather3div8sf ((__v4sf)(__m128)V1OLD, \
12906 (float const *)ADDR, \
12907 (__v4di)(__m256i)INDEX, \
12908 (__mmask8)MASK, (int)SCALE)
12910 #define _mm_mmask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
12911 (__m128) __builtin_ia32_gather3div4sf ((__v4sf)(__m128)V1OLD, \
12912 (float const *)ADDR, \
12913 (__v2di)(__m128i)INDEX, \
12914 (__mmask8)MASK, (int)SCALE)
12916 #define _mm256_mmask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
12917 (__m256d) __builtin_ia32_gather3div4df ((__v4df)(__m256d)V1OLD, \
12918 (double const *)ADDR, \
12919 (__v4di)(__m256i)INDEX, \
12920 (__mmask8)MASK, (int)SCALE)
12922 #define _mm_mmask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
12923 (__m128d) __builtin_ia32_gather3div2df ((__v2df)(__m128d)V1OLD, \
12924 (double const *)ADDR, \
12925 (__v2di)(__m128i)INDEX, \
12926 (__mmask8)MASK, (int)SCALE)
12928 #define _mm256_mmask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
12929 (__m256i) __builtin_ia32_gather3siv8si ((__v8si)(__m256i)V1OLD, \
12930 (int const *)ADDR, \
12931 (__v8si)(__m256i)INDEX, \
12932 (__mmask8)MASK, (int)SCALE)
12934 #define _mm_mmask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
12935 (__m128i) __builtin_ia32_gather3siv4si ((__v4si)(__m128i)V1OLD, \
12936 (int const *)ADDR, \
12937 (__v4si)(__m128i)INDEX, \
12938 (__mmask8)MASK, (int)SCALE)
12940 #define _mm256_mmask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
12941 (__m256i) __builtin_ia32_gather3siv4di ((__v4di)(__m256i)V1OLD, \
12942 (long long const *)ADDR, \
12943 (__v4si)(__m128i)INDEX, \
12944 (__mmask8)MASK, (int)SCALE)
12946 #define _mm_mmask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
12947 (__m128i) __builtin_ia32_gather3siv2di ((__v2di)(__m128i)V1OLD, \
12948 (long long const *)ADDR, \
12949 (__v4si)(__m128i)INDEX, \
12950 (__mmask8)MASK, (int)SCALE)
12952 #define _mm256_mmask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
12953 (__m128i) __builtin_ia32_gather3div8si ((__v4si)(__m128i)V1OLD, \
12954 (int const *)ADDR, \
12955 (__v4di)(__m256i)INDEX, \
12956 (__mmask8)MASK, (int)SCALE)
12958 #define _mm_mmask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
12959 (__m128i) __builtin_ia32_gather3div4si ((__v4si)(__m128i)V1OLD, \
12960 (int const *)ADDR, \
12961 (__v2di)(__m128i)INDEX, \
12962 (__mmask8)MASK, (int)SCALE)
12964 #define _mm256_mmask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
12965 (__m256i) __builtin_ia32_gather3div4di ((__v4di)(__m256i)V1OLD, \
12966 (long long const *)ADDR, \
12967 (__v4di)(__m256i)INDEX, \
12968 (__mmask8)MASK, (int)SCALE)
12970 #define _mm_mmask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
12971 (__m128i) __builtin_ia32_gather3div2di ((__v2di)(__m128i)V1OLD, \
12972 (long long const *)ADDR, \
12973 (__v2di)(__m128i)INDEX, \
12974 (__mmask8)MASK, (int)SCALE)
12976 #define _mm256_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
12977 __builtin_ia32_scattersiv8sf ((float *)ADDR, (__mmask8)0xFF, \
12978 (__v8si)(__m256i)INDEX, \
12979 (__v8sf)(__m256)V1, (int)SCALE)
12981 #define _mm256_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
12982 __builtin_ia32_scattersiv8sf ((float *)ADDR, (__mmask8)MASK, \
12983 (__v8si)(__m256i)INDEX, \
12984 (__v8sf)(__m256)V1, (int)SCALE)
12986 #define _mm_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
12987 __builtin_ia32_scattersiv4sf ((float *)ADDR, (__mmask8)0xFF, \
12988 (__v4si)(__m128i)INDEX, \
12989 (__v4sf)(__m128)V1, (int)SCALE)
12991 #define _mm_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
12992 __builtin_ia32_scattersiv4sf ((float *)ADDR, (__mmask8)MASK, \
12993 (__v4si)(__m128i)INDEX, \
12994 (__v4sf)(__m128)V1, (int)SCALE)
12996 #define _mm256_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
12997 __builtin_ia32_scattersiv4df ((double *)ADDR, (__mmask8)0xFF, \
12998 (__v4si)(__m128i)INDEX, \
12999 (__v4df)(__m256d)V1, (int)SCALE)
13001 #define _mm256_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
13002 __builtin_ia32_scattersiv4df ((double *)ADDR, (__mmask8)MASK, \
13003 (__v4si)(__m128i)INDEX, \
13004 (__v4df)(__m256d)V1, (int)SCALE)
13006 #define _mm_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
13007 __builtin_ia32_scattersiv2df ((double *)ADDR, (__mmask8)0xFF, \
13008 (__v4si)(__m128i)INDEX, \
13009 (__v2df)(__m128d)V1, (int)SCALE)
13011 #define _mm_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
13012 __builtin_ia32_scattersiv2df ((double *)ADDR, (__mmask8)MASK, \
13013 (__v4si)(__m128i)INDEX, \
13014 (__v2df)(__m128d)V1, (int)SCALE)
13016 #define _mm256_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
13017 __builtin_ia32_scatterdiv8sf ((float *)ADDR, (__mmask8)0xFF, \
13018 (__v4di)(__m256i)INDEX, \
13019 (__v4sf)(__m128)V1, (int)SCALE)
13021 #define _mm256_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
13022 __builtin_ia32_scatterdiv8sf ((float *)ADDR, (__mmask8)MASK, \
13023 (__v4di)(__m256i)INDEX, \
13024 (__v4sf)(__m128)V1, (int)SCALE)
13026 #define _mm_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
13027 __builtin_ia32_scatterdiv4sf ((float *)ADDR, (__mmask8)0xFF, \
13028 (__v2di)(__m128i)INDEX, \
13029 (__v4sf)(__m128)V1, (int)SCALE)
13031 #define _mm_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
13032 __builtin_ia32_scatterdiv4sf ((float *)ADDR, (__mmask8)MASK, \
13033 (__v2di)(__m128i)INDEX, \
13034 (__v4sf)(__m128)V1, (int)SCALE)
13036 #define _mm256_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
13037 __builtin_ia32_scatterdiv4df ((double *)ADDR, (__mmask8)0xFF, \
13038 (__v4di)(__m256i)INDEX, \
13039 (__v4df)(__m256d)V1, (int)SCALE)
13041 #define _mm256_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
13042 __builtin_ia32_scatterdiv4df ((double *)ADDR, (__mmask8)MASK, \
13043 (__v4di)(__m256i)INDEX, \
13044 (__v4df)(__m256d)V1, (int)SCALE)
13046 #define _mm_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
13047 __builtin_ia32_scatterdiv2df ((double *)ADDR, (__mmask8)0xFF, \
13048 (__v2di)(__m128i)INDEX, \
13049 (__v2df)(__m128d)V1, (int)SCALE)
13051 #define _mm_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
13052 __builtin_ia32_scatterdiv2df ((double *)ADDR, (__mmask8)MASK, \
13053 (__v2di)(__m128i)INDEX, \
13054 (__v2df)(__m128d)V1, (int)SCALE)
13056 #define _mm256_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
13057 __builtin_ia32_scattersiv8si ((int *)ADDR, (__mmask8)0xFF, \
13058 (__v8si)(__m256i)INDEX, \
13059 (__v8si)(__m256i)V1, (int)SCALE)
13061 #define _mm256_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
13062 __builtin_ia32_scattersiv8si ((int *)ADDR, (__mmask8)MASK, \
13063 (__v8si)(__m256i)INDEX, \
13064 (__v8si)(__m256i)V1, (int)SCALE)
13066 #define _mm_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
13067 __builtin_ia32_scattersiv4si ((int *)ADDR, (__mmask8)0xFF, \
13068 (__v4si)(__m128i)INDEX, \
13069 (__v4si)(__m128i)V1, (int)SCALE)
13071 #define _mm_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
13072 __builtin_ia32_scattersiv4si ((int *)ADDR, (__mmask8)MASK, \
13073 (__v4si)(__m128i)INDEX, \
13074 (__v4si)(__m128i)V1, (int)SCALE)
13076 #define _mm256_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
13077 __builtin_ia32_scattersiv4di ((long long *)ADDR, (__mmask8)0xFF, \
13078 (__v4si)(__m128i)INDEX, \
13079 (__v4di)(__m256i)V1, (int)SCALE)
13081 #define _mm256_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
13082 __builtin_ia32_scattersiv4di ((long long *)ADDR, (__mmask8)MASK, \
13083 (__v4si)(__m128i)INDEX, \
13084 (__v4di)(__m256i)V1, (int)SCALE)
13086 #define _mm_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
13087 __builtin_ia32_scattersiv2di ((long long *)ADDR, (__mmask8)0xFF, \
13088 (__v4si)(__m128i)INDEX, \
13089 (__v2di)(__m128i)V1, (int)SCALE)
13091 #define _mm_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
13092 __builtin_ia32_scattersiv2di ((long long *)ADDR, (__mmask8)MASK, \
13093 (__v4si)(__m128i)INDEX, \
13094 (__v2di)(__m128i)V1, (int)SCALE)
13096 #define _mm256_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
13097 __builtin_ia32_scatterdiv8si ((int *)ADDR, (__mmask8)0xFF, \
13098 (__v4di)(__m256i)INDEX, \
13099 (__v4si)(__m128i)V1, (int)SCALE)
13101 #define _mm256_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
13102 __builtin_ia32_scatterdiv8si ((int *)ADDR, (__mmask8)MASK, \
13103 (__v4di)(__m256i)INDEX, \
13104 (__v4si)(__m128i)V1, (int)SCALE)
13106 #define _mm_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
13107 __builtin_ia32_scatterdiv4si ((int *)ADDR, (__mmask8)0xFF, \
13108 (__v2di)(__m128i)INDEX, \
13109 (__v4si)(__m128i)V1, (int)SCALE)
13111 #define _mm_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
13112 __builtin_ia32_scatterdiv4si ((int *)ADDR, (__mmask8)MASK, \
13113 (__v2di)(__m128i)INDEX, \
13114 (__v4si)(__m128i)V1, (int)SCALE)
13116 #define _mm256_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
13117 __builtin_ia32_scatterdiv4di ((long long *)ADDR, (__mmask8)0xFF, \
13118 (__v4di)(__m256i)INDEX, \
13119 (__v4di)(__m256i)V1, (int)SCALE)
13121 #define _mm256_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
13122 __builtin_ia32_scatterdiv4di ((long long *)ADDR, (__mmask8)MASK, \
13123 (__v4di)(__m256i)INDEX, \
13124 (__v4di)(__m256i)V1, (int)SCALE)
13126 #define _mm_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
13127 __builtin_ia32_scatterdiv2di ((long long *)ADDR, (__mmask8)0xFF, \
13128 (__v2di)(__m128i)INDEX, \
13129 (__v2di)(__m128i)V1, (int)SCALE)
13131 #define _mm_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
13132 __builtin_ia32_scatterdiv2di ((long long *)ADDR, (__mmask8)MASK, \
13133 (__v2di)(__m128i)INDEX, \
13134 (__v2di)(__m128i)V1, (int)SCALE)
13136 #define _mm256_mask_shuffle_epi32(W, U, X, C) \
13137 ((__m256i) __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C), \
13138 (__v8si)(__m256i)(W), \
13139 (__mmask8)(U)))
13141 #define _mm256_maskz_shuffle_epi32(U, X, C) \
13142 ((__m256i) __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C), \
13143 (__v8si)(__m256i)_mm256_setzero_si256(), \
13144 (__mmask8)(U)))
13146 #define _mm_mask_shuffle_epi32(W, U, X, C) \
13147 ((__m128i) __builtin_ia32_pshufd128_mask ((__v4si)(__m128i)(X), (int)(C), \
13148 (__v4si)(__m128i)(W), \
13149 (__mmask8)(U)))
13151 #define _mm_maskz_shuffle_epi32(U, X, C) \
13152 ((__m128i) __builtin_ia32_pshufd128_mask ((__v4si)(__m128i)(X), (int)(C), \
13153 (__v4si)(__m128i)_mm_setzero_si128 (), \
13154 (__mmask8)(U)))
13156 #define _mm256_rol_epi64(A, B) \
13157 ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13158 (__v4di)(__m256i)_mm256_setzero_si256 (),\
13159 (__mmask8)-1))
13161 #define _mm256_mask_rol_epi64(W, U, A, B) \
13162 ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13163 (__v4di)(__m256i)(W), \
13164 (__mmask8)(U)))
13166 #define _mm256_maskz_rol_epi64(U, A, B) \
13167 ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13168 (__v4di)(__m256i)_mm256_setzero_si256 (),\
13169 (__mmask8)(U)))
13171 #define _mm_rol_epi64(A, B) \
13172 ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \
13173 (__v2di)(__m128i)_mm_setzero_di(), \
13174 (__mmask8)-1))
13176 #define _mm_mask_rol_epi64(W, U, A, B) \
13177 ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \
13178 (__v2di)(__m128i)(W), \
13179 (__mmask8)(U)))
13181 #define _mm_maskz_rol_epi64(U, A, B) \
13182 ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \
13183 (__v2di)(__m128i)_mm_setzero_di(), \
13184 (__mmask8)(U)))
13186 #define _mm256_ror_epi64(A, B) \
13187 ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13188 (__v4di)(__m256i)_mm256_setzero_si256 (),\
13189 (__mmask8)-1))
13191 #define _mm256_mask_ror_epi64(W, U, A, B) \
13192 ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13193 (__v4di)(__m256i)(W), \
13194 (__mmask8)(U)))
13196 #define _mm256_maskz_ror_epi64(U, A, B) \
13197 ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13198 (__v4di)(__m256i)_mm256_setzero_si256 (),\
13199 (__mmask8)(U)))
13201 #define _mm_ror_epi64(A, B) \
13202 ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \
13203 (__v2di)(__m128i)_mm_setzero_di(), \
13204 (__mmask8)-1))
13206 #define _mm_mask_ror_epi64(W, U, A, B) \
13207 ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \
13208 (__v2di)(__m128i)(W), \
13209 (__mmask8)(U)))
13211 #define _mm_maskz_ror_epi64(U, A, B) \
13212 ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \
13213 (__v2di)(__m128i)_mm_setzero_di(), \
13214 (__mmask8)(U)))
13216 #define _mm256_rol_epi32(A, B) \
13217 ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \
13218 (__v8si)(__m256i)_mm256_setzero_si256(),\
13219 (__mmask8)-1))
13221 #define _mm256_mask_rol_epi32(W, U, A, B) \
13222 ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \
13223 (__v8si)(__m256i)(W), \
13224 (__mmask8)(U)))
13226 #define _mm256_maskz_rol_epi32(U, A, B) \
13227 ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \
13228 (__v8si)(__m256i)_mm256_setzero_si256(),\
13229 (__mmask8)(U)))
13231 #define _mm_rol_epi32(A, B) \
13232 ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \
13233 (__v4si)(__m128i)_mm_setzero_si128 (), \
13234 (__mmask8)-1))
13236 #define _mm_mask_rol_epi32(W, U, A, B) \
13237 ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \
13238 (__v4si)(__m128i)(W), \
13239 (__mmask8)(U)))
13241 #define _mm_maskz_rol_epi32(U, A, B) \
13242 ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \
13243 (__v4si)(__m128i)_mm_setzero_si128 (), \
13244 (__mmask8)(U)))
13246 #define _mm256_ror_epi32(A, B) \
13247 ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \
13248 (__v8si)(__m256i)_mm256_setzero_si256(),\
13249 (__mmask8)-1))
13251 #define _mm256_mask_ror_epi32(W, U, A, B) \
13252 ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \
13253 (__v8si)(__m256i)(W), \
13254 (__mmask8)(U)))
13256 #define _mm256_maskz_ror_epi32(U, A, B) \
13257 ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \
13258 (__v8si)(__m256i)_mm256_setzero_si256(),\
13259 (__mmask8)(U)))
13261 #define _mm_ror_epi32(A, B) \
13262 ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \
13263 (__v4si)(__m128i)_mm_setzero_si128 (), \
13264 (__mmask8)-1))
13266 #define _mm_mask_ror_epi32(W, U, A, B) \
13267 ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \
13268 (__v4si)(__m128i)(W), \
13269 (__mmask8)(U)))
13271 #define _mm_maskz_ror_epi32(U, A, B) \
13272 ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \
13273 (__v4si)(__m128i)_mm_setzero_si128 (), \
13274 (__mmask8)(U)))
13276 #define _mm256_alignr_epi32(X, Y, C) \
13277 ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X), \
13278 (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)(X), (__mmask8)-1))
13280 #define _mm256_mask_alignr_epi32(W, U, X, Y, C) \
13281 ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X), \
13282 (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)(W), (__mmask8)(U)))
13284 #define _mm256_maskz_alignr_epi32(U, X, Y, C) \
13285 ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X), \
13286 (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)_mm256_setzero_si256 (),\
13287 (__mmask8)(U)))
13289 #define _mm256_alignr_epi64(X, Y, C) \
13290 ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X), \
13291 (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)(X), (__mmask8)-1))
13293 #define _mm256_mask_alignr_epi64(W, U, X, Y, C) \
13294 ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X), \
13295 (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)(W), (__mmask8)(U)))
13297 #define _mm256_maskz_alignr_epi64(U, X, Y, C) \
13298 ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X), \
13299 (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)_mm256_setzero_si256 (),\
13300 (__mmask8)(U)))
13302 #define _mm_alignr_epi32(X, Y, C) \
13303 ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X), \
13304 (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)(X), (__mmask8)-1))
13306 #define _mm_mask_alignr_epi32(W, U, X, Y, C) \
13307 ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X), \
13308 (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)(W), (__mmask8)(U)))
13310 #define _mm_maskz_alignr_epi32(U, X, Y, C) \
13311 ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X), \
13312 (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)_mm_setzero_si128(),\
13313 (__mmask8)(U)))
13315 #define _mm_alignr_epi64(X, Y, C) \
13316 ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \
13317 (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(X), (__mmask8)-1))
13319 #define _mm_mask_alignr_epi64(W, U, X, Y, C) \
13320 ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \
13321 (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(X), (__mmask8)-1))
13323 #define _mm_maskz_alignr_epi64(U, X, Y, C) \
13324 ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \
13325 (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)_mm_setzero_si128(),\
13326 (__mmask8)(U)))
13328 #define _mm_mask_cvtps_ph(W, U, A, I) \
13329 ((__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf)(__m128) A, (int) (I), \
13330 (__v8hi)(__m128i) (W), (__mmask8) (U)))
13332 #define _mm_maskz_cvtps_ph(U, A, I) \
13333 ((__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf)(__m128) A, (int) (I), \
13334 (__v8hi)(__m128i) _mm_setzero_hi(), (__mmask8) (U)))
13336 #define _mm256_mask_cvtps_ph(W, U, A, I) \
13337 ((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) A, (int) (I), \
13338 (__v8hi)(__m128i) (W), (__mmask8) (U)))
13340 #define _mm256_maskz_cvtps_ph(U, A, I) \
13341 ((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) A, (int) (I), \
13342 (__v8hi)(__m128i) _mm_setzero_hi(), (__mmask8) (U)))
13344 #define _mm256_mask_srai_epi32(W, U, A, B) \
13345 ((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A), \
13346 (int)(B), (__v8si)(__m256i)(W), (__mmask8)(U)))
13348 #define _mm256_maskz_srai_epi32(U, A, B) \
13349 ((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A), \
13350 (int)(B), (__v8si)_mm256_setzero_si256(), (__mmask8)(U)))
13352 #define _mm_mask_srai_epi32(W, U, A, B) \
13353 ((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A), \
13354 (int)(B), (__v4si)(__m128i)(W), (__mmask8)(U)))
13356 #define _mm_maskz_srai_epi32(U, A, B) \
13357 ((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A), \
13358 (int)(B), (__v4si)_mm_setzero_si128(), (__mmask8)(U)))
13360 #define _mm256_srai_epi64(A, B) \
13361 ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \
13362 (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)-1))
13364 #define _mm256_mask_srai_epi64(W, U, A, B) \
13365 ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \
13366 (int)(B), (__v4di)(__m256i)(W), (__mmask8)(U)))
13368 #define _mm256_maskz_srai_epi64(U, A, B) \
13369 ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \
13370 (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)(U)))
13372 #define _mm_srai_epi64(A, B) \
13373 ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
13374 (int)(B), (__v2di)_mm_setzero_si128(), (__mmask8)-1))
13376 #define _mm_mask_srai_epi64(W, U, A, B) \
13377 ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
13378 (int)(B), (__v2di)(__m128i)(W), (__mmask8)(U)))
13380 #define _mm_maskz_srai_epi64(U, A, B) \
13381 ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
13382 (int)(B), (__v2di)_mm_setzero_si128(), (__mmask8)(U)))
13384 #define _mm256_mask_permutex_pd(W, U, A, B) \
13385 ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A), \
13386 (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
13388 #define _mm256_maskz_permutex_pd(U, A, B) \
13389 ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A), \
13390 (int)(B), (__v4df)(__m256d)_mm256_setzero_pd(), (__mmask8)(U)))
13392 #define _mm256_mask_permute_pd(W, U, X, C) \
13393 ((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C), \
13394 (__v4df)(__m256d)(W), \
13395 (__mmask8)(U)))
13397 #define _mm256_maskz_permute_pd(U, X, C) \
13398 ((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C), \
13399 (__v4df)(__m256d)_mm256_setzero_pd(), \
13400 (__mmask8)(U)))
13402 #define _mm256_mask_permute_ps(W, U, X, C) \
13403 ((__m256) __builtin_ia32_vpermilps256_mask ((__v8sf)(__m256)(X), (int)(C), \
13404 (__v8sf)(__m256)(W), (__mmask8)(U)))
13406 #define _mm256_maskz_permute_ps(U, X, C) \
13407 ((__m256) __builtin_ia32_vpermilps256_mask ((__v8sf)(__m256)(X), (int)(C), \
13408 (__v8sf)(__m256)_mm256_setzero_ps(), \
13409 (__mmask8)(U)))
13411 #define _mm_mask_permute_pd(W, U, X, C) \
13412 ((__m128d) __builtin_ia32_vpermilpd_mask ((__v2df)(__m128d)(X), (int)(C), \
13413 (__v2df)(__m128d)(W), (__mmask8)(U)))
13415 #define _mm_maskz_permute_pd(U, X, C) \
13416 ((__m128d) __builtin_ia32_vpermilpd_mask ((__v2df)(__m128d)(X), (int)(C), \
13417 (__v2df)(__m128d)_mm_setzero_pd(), \
13418 (__mmask8)(U)))
13420 #define _mm_mask_permute_ps(W, U, X, C) \
13421 ((__m128) __builtin_ia32_vpermilps_mask ((__v4sf)(__m128)(X), (int)(C), \
13422 (__v4sf)(__m128)(W), (__mmask8)(U)))
13424 #define _mm_maskz_permute_ps(U, X, C) \
13425 ((__m128) __builtin_ia32_vpermilps_mask ((__v4sf)(__m128)(X), (int)(C), \
13426 (__v4sf)(__m128)_mm_setzero_ps(), \
13427 (__mmask8)(U)))
13429 #define _mm256_mask_blend_pd(__U, __A, __W) \
13430 ((__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) (__A), \
13431 (__v4df) (__W), \
13432 (__mmask8) (__U)))
13434 #define _mm256_mask_blend_ps(__U, __A, __W) \
13435 ((__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) (__A), \
13436 (__v8sf) (__W), \
13437 (__mmask8) (__U)))
13439 #define _mm256_mask_blend_epi64(__U, __A, __W) \
13440 ((__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) (__A), \
13441 (__v4di) (__W), \
13442 (__mmask8) (__U)))
13444 #define _mm256_mask_blend_epi32(__U, __A, __W) \
13445 ((__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) (__A), \
13446 (__v8si) (__W), \
13447 (__mmask8) (__U)))
13449 #define _mm_mask_blend_pd(__U, __A, __W) \
13450 ((__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) (__A), \
13451 (__v2df) (__W), \
13452 (__mmask8) (__U)))
13454 #define _mm_mask_blend_ps(__U, __A, __W) \
13455 ((__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) (__A), \
13456 (__v4sf) (__W), \
13457 (__mmask8) (__U)))
13459 #define _mm_mask_blend_epi64(__U, __A, __W) \
13460 ((__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) (__A), \
13461 (__v2di) (__W), \
13462 (__mmask8) (__U)))
13464 #define _mm_mask_blend_epi32(__U, __A, __W) \
13465 ((__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) (__A), \
13466 (__v4si) (__W), \
13467 (__mmask8) (__U)))
13469 #define _mm256_cmp_epu32_mask(X, Y, P) \
13470 ((__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si)(__m256i)(X), \
13471 (__v8si)(__m256i)(Y), (int)(P),\
13472 (__mmask8)-1))
13474 #define _mm256_cmp_epi64_mask(X, Y, P) \
13475 ((__mmask8) __builtin_ia32_cmpq256_mask ((__v4di)(__m256i)(X), \
13476 (__v4di)(__m256i)(Y), (int)(P),\
13477 (__mmask8)-1))
13479 #define _mm256_cmp_epi32_mask(X, Y, P) \
13480 ((__mmask8) __builtin_ia32_cmpd256_mask ((__v8si)(__m256i)(X), \
13481 (__v8si)(__m256i)(Y), (int)(P),\
13482 (__mmask8)-1))
13484 #define _mm256_cmp_epu64_mask(X, Y, P) \
13485 ((__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di)(__m256i)(X), \
13486 (__v4di)(__m256i)(Y), (int)(P),\
13487 (__mmask8)-1))
13489 #define _mm256_cmp_pd_mask(X, Y, P) \
13490 ((__mmask8) __builtin_ia32_cmppd256_mask ((__v4df)(__m256d)(X), \
13491 (__v4df)(__m256d)(Y), (int)(P),\
13492 (__mmask8)-1))
13494 #define _mm256_cmp_ps_mask(X, Y, P) \
13495 ((__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf)(__m256)(X), \
13496 (__v8sf)(__m256)(Y), (int)(P),\
13497 (__mmask8)-1))
13499 #define _mm256_mask_cmp_epi64_mask(M, X, Y, P) \
13500 ((__mmask8) __builtin_ia32_cmpq256_mask ((__v4di)(__m256i)(X), \
13501 (__v4di)(__m256i)(Y), (int)(P),\
13502 (__mmask8)(M)))
13504 #define _mm256_mask_cmp_epi32_mask(M, X, Y, P) \
13505 ((__mmask8) __builtin_ia32_cmpd256_mask ((__v8si)(__m256i)(X), \
13506 (__v8si)(__m256i)(Y), (int)(P),\
13507 (__mmask8)(M)))
13509 #define _mm256_mask_cmp_epu64_mask(M, X, Y, P) \
13510 ((__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di)(__m256i)(X), \
13511 (__v4di)(__m256i)(Y), (int)(P),\
13512 (__mmask8)(M)))
13514 #define _mm256_mask_cmp_epu32_mask(M, X, Y, P) \
13515 ((__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si)(__m256i)(X), \
13516 (__v8si)(__m256i)(Y), (int)(P),\
13517 (__mmask8)(M)))
13519 #define _mm256_mask_cmp_pd_mask(M, X, Y, P) \
13520 ((__mmask8) __builtin_ia32_cmppd256_mask ((__v4df)(__m256d)(X), \
13521 (__v4df)(__m256d)(Y), (int)(P),\
13522 (__mmask8)(M)))
13524 #define _mm256_mask_cmp_ps_mask(M, X, Y, P) \
13525 ((__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf)(__m256)(X), \
13526 (__v8sf)(__m256)(Y), (int)(P),\
13527 (__mmask8)(M)))
13529 #define _mm_cmp_epi64_mask(X, Y, P) \
13530 ((__mmask8) __builtin_ia32_cmpq128_mask ((__v2di)(__m128i)(X), \
13531 (__v2di)(__m128i)(Y), (int)(P),\
13532 (__mmask8)-1))
13534 #define _mm_cmp_epi32_mask(X, Y, P) \
13535 ((__mmask8) __builtin_ia32_cmpd128_mask ((__v4si)(__m128i)(X), \
13536 (__v4si)(__m128i)(Y), (int)(P),\
13537 (__mmask8)-1))
13539 #define _mm_cmp_epu64_mask(X, Y, P) \
13540 ((__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di)(__m128i)(X), \
13541 (__v2di)(__m128i)(Y), (int)(P),\
13542 (__mmask8)-1))
13544 #define _mm_cmp_epu32_mask(X, Y, P) \
13545 ((__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si)(__m128i)(X), \
13546 (__v4si)(__m128i)(Y), (int)(P),\
13547 (__mmask8)-1))
13549 #define _mm_cmp_pd_mask(X, Y, P) \
13550 ((__mmask8) __builtin_ia32_cmppd128_mask ((__v2df)(__m128d)(X), \
13551 (__v2df)(__m128d)(Y), (int)(P),\
13552 (__mmask8)-1))
13554 #define _mm_cmp_ps_mask(X, Y, P) \
13555 ((__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf)(__m128)(X), \
13556 (__v4sf)(__m128)(Y), (int)(P),\
13557 (__mmask8)-1))
13559 #define _mm_mask_cmp_epi64_mask(M, X, Y, P) \
13560 ((__mmask8) __builtin_ia32_cmpq128_mask ((__v2di)(__m128i)(X), \
13561 (__v2di)(__m128i)(Y), (int)(P),\
13562 (__mmask8)(M)))
13564 #define _mm_mask_cmp_epi32_mask(M, X, Y, P) \
13565 ((__mmask8) __builtin_ia32_cmpd128_mask ((__v4si)(__m128i)(X), \
13566 (__v4si)(__m128i)(Y), (int)(P),\
13567 (__mmask8)(M)))
13569 #define _mm_mask_cmp_epu64_mask(M, X, Y, P) \
13570 ((__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di)(__m128i)(X), \
13571 (__v2di)(__m128i)(Y), (int)(P),\
13572 (__mmask8)(M)))
13574 #define _mm_mask_cmp_epu32_mask(M, X, Y, P) \
13575 ((__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si)(__m128i)(X), \
13576 (__v4si)(__m128i)(Y), (int)(P),\
13577 (__mmask8)(M)))
13579 #define _mm_mask_cmp_pd_mask(M, X, Y, P) \
13580 ((__mmask8) __builtin_ia32_cmppd128_mask ((__v2df)(__m128d)(X), \
13581 (__v2df)(__m128d)(Y), (int)(P),\
13582 (__mmask8)(M)))
13584 #define _mm_mask_cmp_ps_mask(M, X, Y, P) \
13585 ((__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf)(__m128)(X), \
13586 (__v4sf)(__m128)(Y), (int)(P),\
13587 (__mmask8)(M)))
13589 #endif
13591 #define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps((B), (A))
13593 #ifdef __DISABLE_AVX512VL__
13594 #undef __DISABLE_AVX512VL__
13595 #pragma GCC pop_options
13596 #endif /* __DISABLE_AVX512VL__ */
13598 #endif /* _AVX512VLINTRIN_H_INCLUDED */