Initial support for AVX-512{VL,BW,DQ}
[official-gcc.git] / gcc / config / i386 / avx512vlintrin.h
blob5c45e8b5d9bc512fca063e5a385bd11ef99d4e38
1 /* Copyright (C) 2014
2 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 Under Section 7 of GPL version 3, you are granted additional
17 permissions described in the GCC Runtime Library Exception, version
18 3.1, as published by the Free Software Foundation.
20 You should have received a copy of the GNU General Public License and
21 a copy of the GCC Runtime Library Exception along with this program;
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 <http://www.gnu.org/licenses/>. */
25 #ifndef _IMMINTRIN_H_INCLUDED
26 #error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
27 #endif
29 #ifndef _AVX512VLINTRIN_H_INCLUDED
30 #define _AVX512VLINTRIN_H_INCLUDED
32 /* Doesn't require avx512vl target and is used in avx512dqintrin.h. */
33 extern __inline __m128i
34 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
35 _mm_setzero_di (void)
37 return __extension__ (__m128i)(__v2di){ 0, 0};
40 #ifndef __AVX512VL__
41 #pragma GCC push_options
42 #pragma GCC target("avx512vl")
43 #define __DISABLE_AVX512VL__
44 #endif /* __AVX512VL__ */
46 /* Internal data types for implementing the intrinsics. */
47 typedef unsigned int __mmask32;
49 extern __inline __m256d
50 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
51 _mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A)
53 return (__m256d) __builtin_ia32_movapd256_mask ((__v4df) __A,
54 (__v4df) __W,
55 (__mmask8) __U);
58 extern __inline __m256d
59 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
60 _mm256_maskz_mov_pd (__mmask8 __U, __m256d __A)
62 return (__m256d) __builtin_ia32_movapd256_mask ((__v4df) __A,
63 (__v4df)
64 _mm256_setzero_pd (),
65 (__mmask8) __U);
68 extern __inline __m128d
69 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
70 _mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A)
72 return (__m128d) __builtin_ia32_movapd128_mask ((__v2df) __A,
73 (__v2df) __W,
74 (__mmask8) __U);
77 extern __inline __m128d
78 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79 _mm_maskz_mov_pd (__mmask8 __U, __m128d __A)
81 return (__m128d) __builtin_ia32_movapd128_mask ((__v2df) __A,
82 (__v2df)
83 _mm_setzero_pd (),
84 (__mmask8) __U);
87 extern __inline __m256d
88 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
89 _mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P)
91 return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
92 (__v4df) __W,
93 (__mmask8) __U);
96 extern __inline __m256d
97 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
98 _mm256_maskz_load_pd (__mmask8 __U, void const *__P)
100 return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
101 (__v4df)
102 _mm256_setzero_pd (),
103 (__mmask8) __U);
106 extern __inline __m128d
107 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
108 _mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P)
110 return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
111 (__v2df) __W,
112 (__mmask8) __U);
115 extern __inline __m128d
116 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
117 _mm_maskz_load_pd (__mmask8 __U, void const *__P)
119 return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
120 (__v2df)
121 _mm_setzero_pd (),
122 (__mmask8) __U);
125 extern __inline void
126 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
127 _mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A)
129 __builtin_ia32_storeapd256_mask ((__v4df *) __P,
130 (__v4df) __A,
131 (__mmask8) __U);
134 extern __inline void
135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
136 _mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A)
138 __builtin_ia32_storeapd128_mask ((__v2df *) __P,
139 (__v2df) __A,
140 (__mmask8) __U);
143 extern __inline __m256
144 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
145 _mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A)
147 return (__m256) __builtin_ia32_movaps256_mask ((__v8sf) __A,
148 (__v8sf) __W,
149 (__mmask8) __U);
152 extern __inline __m256
153 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
154 _mm256_maskz_mov_ps (__mmask8 __U, __m256 __A)
156 return (__m256) __builtin_ia32_movaps256_mask ((__v8sf) __A,
157 (__v8sf)
158 _mm256_setzero_ps (),
159 (__mmask8) __U);
162 extern __inline __m128
163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
164 _mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A)
166 return (__m128) __builtin_ia32_movaps128_mask ((__v4sf) __A,
167 (__v4sf) __W,
168 (__mmask8) __U);
171 extern __inline __m128
172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
173 _mm_maskz_mov_ps (__mmask8 __U, __m128 __A)
175 return (__m128) __builtin_ia32_movaps128_mask ((__v4sf) __A,
176 (__v4sf)
177 _mm_setzero_ps (),
178 (__mmask8) __U);
181 extern __inline __m256
182 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
183 _mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P)
185 return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
186 (__v8sf) __W,
187 (__mmask8) __U);
190 extern __inline __m256
191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
192 _mm256_maskz_load_ps (__mmask8 __U, void const *__P)
194 return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
195 (__v8sf)
196 _mm256_setzero_ps (),
197 (__mmask8) __U);
200 extern __inline __m128
201 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
202 _mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P)
204 return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
205 (__v4sf) __W,
206 (__mmask8) __U);
209 extern __inline __m128
210 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
211 _mm_maskz_load_ps (__mmask8 __U, void const *__P)
213 return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
214 (__v4sf)
215 _mm_setzero_ps (),
216 (__mmask8) __U);
219 extern __inline void
220 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
221 _mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A)
223 __builtin_ia32_storeaps256_mask ((__v8sf *) __P,
224 (__v8sf) __A,
225 (__mmask8) __U);
228 extern __inline void
229 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
230 _mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A)
232 __builtin_ia32_storeaps128_mask ((__v4sf *) __P,
233 (__v4sf) __A,
234 (__mmask8) __U);
237 extern __inline __m256i
238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
239 _mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
241 return (__m256i) __builtin_ia32_movdqa64_256_mask ((__v4di) __A,
242 (__v4di) __W,
243 (__mmask8) __U);
246 extern __inline __m256i
247 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
248 _mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A)
250 return (__m256i) __builtin_ia32_movdqa64_256_mask ((__v4di) __A,
251 (__v4di)
252 _mm256_setzero_si256 (),
253 (__mmask8) __U);
256 extern __inline __m128i
257 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
258 _mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
260 return (__m128i) __builtin_ia32_movdqa64_128_mask ((__v2di) __A,
261 (__v2di) __W,
262 (__mmask8) __U);
265 extern __inline __m128i
266 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
267 _mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A)
269 return (__m128i) __builtin_ia32_movdqa64_128_mask ((__v2di) __A,
270 (__v2di)
271 _mm_setzero_di (),
272 (__mmask8) __U);
275 extern __inline __m256i
276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
277 _mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
279 return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
280 (__v4di) __W,
281 (__mmask8)
282 __U);
285 extern __inline __m256i
286 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
287 _mm256_maskz_load_epi64 (__mmask8 __U, void const *__P)
289 return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
290 (__v4di)
291 _mm256_setzero_si256 (),
292 (__mmask8)
293 __U);
296 extern __inline __m128i
297 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
298 _mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
300 return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
301 (__v2di) __W,
302 (__mmask8)
303 __U);
306 extern __inline __m128i
307 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
308 _mm_maskz_load_epi64 (__mmask8 __U, void const *__P)
310 return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
311 (__v2di)
312 _mm_setzero_di (),
313 (__mmask8)
314 __U);
317 extern __inline void
318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
319 _mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A)
321 __builtin_ia32_movdqa64store256_mask ((__v4di *) __P,
322 (__v4di) __A,
323 (__mmask8) __U);
326 extern __inline void
327 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
328 _mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A)
330 __builtin_ia32_movdqa64store128_mask ((__v2di *) __P,
331 (__v2di) __A,
332 (__mmask8) __U);
335 extern __inline __m256i
336 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
337 _mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
339 return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A,
340 (__v8si) __W,
341 (__mmask8) __U);
344 extern __inline __m256i
345 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
346 _mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A)
348 return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A,
349 (__v8si)
350 _mm256_setzero_si256 (),
351 (__mmask8) __U);
354 extern __inline __m128i
355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
356 _mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
358 return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A,
359 (__v4si) __W,
360 (__mmask8) __U);
363 extern __inline __m128i
364 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
365 _mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A)
367 return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A,
368 (__v4si)
369 _mm_setzero_si128 (),
370 (__mmask8) __U);
373 extern __inline __m256i
374 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
375 _mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
377 return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
378 (__v8si) __W,
379 (__mmask8)
380 __U);
383 extern __inline __m256i
384 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
385 _mm256_maskz_load_epi32 (__mmask8 __U, void const *__P)
387 return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
388 (__v8si)
389 _mm256_setzero_si256 (),
390 (__mmask8)
391 __U);
394 extern __inline __m128i
395 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
396 _mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
398 return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
399 (__v4si) __W,
400 (__mmask8)
401 __U);
404 extern __inline __m128i
405 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
406 _mm_maskz_load_epi32 (__mmask8 __U, void const *__P)
408 return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
409 (__v4si)
410 _mm_setzero_si128 (),
411 (__mmask8)
412 __U);
415 extern __inline void
416 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
417 _mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A)
419 __builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
420 (__v8si) __A,
421 (__mmask8) __U);
424 extern __inline void
425 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
426 _mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
428 __builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
429 (__v4si) __A,
430 (__mmask8) __U);
433 extern __inline __m128i
434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
435 _mm_setzero_hi (void)
437 return __extension__ (__m128i) (__v8hi)
439 0, 0, 0, 0, 0, 0, 0, 0};
442 extern __inline __m128d
443 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
444 _mm_mask_add_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
446 return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
447 (__v2df) __B,
448 (__v2df) __W,
449 (__mmask8) __U);
452 extern __inline __m128d
453 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
454 _mm_maskz_add_pd (__mmask8 __U, __m128d __A, __m128d __B)
456 return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
457 (__v2df) __B,
458 (__v2df)
459 _mm_setzero_pd (),
460 (__mmask8) __U);
463 extern __inline __m256d
464 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
465 _mm256_mask_add_pd (__m256d __W, __mmask8 __U, __m256d __A,
466 __m256d __B)
468 return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
469 (__v4df) __B,
470 (__v4df) __W,
471 (__mmask8) __U);
474 extern __inline __m256d
475 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
476 _mm256_maskz_add_pd (__mmask8 __U, __m256d __A, __m256d __B)
478 return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
479 (__v4df) __B,
480 (__v4df)
481 _mm256_setzero_pd (),
482 (__mmask8) __U);
485 extern __inline __m128
486 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
487 _mm_mask_add_ps (__m128 __W, __mmask16 __U, __m128 __A, __m128 __B)
489 return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
490 (__v4sf) __B,
491 (__v4sf) __W,
492 (__mmask8) __U);
495 extern __inline __m128
496 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
497 _mm_maskz_add_ps (__mmask16 __U, __m128 __A, __m128 __B)
499 return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
500 (__v4sf) __B,
501 (__v4sf)
502 _mm_setzero_ps (),
503 (__mmask8) __U);
506 extern __inline __m256
507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
508 _mm256_mask_add_ps (__m256 __W, __mmask16 __U, __m256 __A, __m256 __B)
510 return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
511 (__v8sf) __B,
512 (__v8sf) __W,
513 (__mmask8) __U);
516 extern __inline __m256
517 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
518 _mm256_maskz_add_ps (__mmask16 __U, __m256 __A, __m256 __B)
520 return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
521 (__v8sf) __B,
522 (__v8sf)
523 _mm256_setzero_ps (),
524 (__mmask8) __U);
527 extern __inline __m128d
528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
529 _mm_mask_sub_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
531 return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
532 (__v2df) __B,
533 (__v2df) __W,
534 (__mmask8) __U);
537 extern __inline __m128d
538 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
539 _mm_maskz_sub_pd (__mmask8 __U, __m128d __A, __m128d __B)
541 return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
542 (__v2df) __B,
543 (__v2df)
544 _mm_setzero_pd (),
545 (__mmask8) __U);
548 extern __inline __m256d
549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
550 _mm256_mask_sub_pd (__m256d __W, __mmask8 __U, __m256d __A,
551 __m256d __B)
553 return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
554 (__v4df) __B,
555 (__v4df) __W,
556 (__mmask8) __U);
559 extern __inline __m256d
560 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
561 _mm256_maskz_sub_pd (__mmask8 __U, __m256d __A, __m256d __B)
563 return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
564 (__v4df) __B,
565 (__v4df)
566 _mm256_setzero_pd (),
567 (__mmask8) __U);
570 extern __inline __m128
571 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
572 _mm_mask_sub_ps (__m128 __W, __mmask16 __U, __m128 __A, __m128 __B)
574 return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
575 (__v4sf) __B,
576 (__v4sf) __W,
577 (__mmask8) __U);
580 extern __inline __m128
581 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
582 _mm_maskz_sub_ps (__mmask16 __U, __m128 __A, __m128 __B)
584 return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
585 (__v4sf) __B,
586 (__v4sf)
587 _mm_setzero_ps (),
588 (__mmask8) __U);
591 extern __inline __m256
592 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
593 _mm256_mask_sub_ps (__m256 __W, __mmask16 __U, __m256 __A, __m256 __B)
595 return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
596 (__v8sf) __B,
597 (__v8sf) __W,
598 (__mmask8) __U);
601 extern __inline __m256
602 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
603 _mm256_maskz_sub_ps (__mmask16 __U, __m256 __A, __m256 __B)
605 return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
606 (__v8sf) __B,
607 (__v8sf)
608 _mm256_setzero_ps (),
609 (__mmask8) __U);
612 extern __inline __m256i
613 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
614 _mm256_load_epi64 (void const *__P)
616 return *(__m256i *) __P;
619 extern __inline __m128i
620 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
621 _mm_load_epi64 (void const *__P)
623 return *(__m128i *) __P;
626 extern __inline void
627 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
628 _mm256_store_epi64 (void *__P, __m256i __A)
630 *(__m256i *) __P = __A;
633 extern __inline void
634 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
635 _mm_store_epi64 (void *__P, __m128i __A)
637 *(__m128i *) __P = __A;
640 extern __inline __m256d
641 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
642 _mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P)
644 return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
645 (__v4df) __W,
646 (__mmask8) __U);
649 extern __inline __m256d
650 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
651 _mm256_maskz_loadu_pd (__mmask8 __U, void const *__P)
653 return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
654 (__v4df)
655 _mm256_setzero_pd (),
656 (__mmask8) __U);
659 extern __inline __m128d
660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
661 _mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P)
663 return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
664 (__v2df) __W,
665 (__mmask8) __U);
668 extern __inline __m128d
669 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
670 _mm_maskz_loadu_pd (__mmask8 __U, void const *__P)
672 return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
673 (__v2df)
674 _mm_setzero_pd (),
675 (__mmask8) __U);
678 extern __inline void
679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
680 _mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A)
682 __builtin_ia32_storeupd256_mask ((__v4df *) __P,
683 (__v4df) __A,
684 (__mmask8) __U);
687 extern __inline void
688 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
689 _mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A)
691 __builtin_ia32_storeupd128_mask ((__v2df *) __P,
692 (__v2df) __A,
693 (__mmask8) __U);
696 extern __inline __m256
697 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
698 _mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P)
700 return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
701 (__v8sf) __W,
702 (__mmask8) __U);
705 extern __inline __m256
706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
707 _mm256_maskz_loadu_ps (__mmask8 __U, void const *__P)
709 return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
710 (__v8sf)
711 _mm256_setzero_ps (),
712 (__mmask8) __U);
715 extern __inline __m128
716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
717 _mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P)
719 return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
720 (__v4sf) __W,
721 (__mmask8) __U);
724 extern __inline __m128
725 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
726 _mm_maskz_loadu_ps (__mmask8 __U, void const *__P)
728 return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
729 (__v4sf)
730 _mm_setzero_ps (),
731 (__mmask8) __U);
734 extern __inline void
735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
736 _mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A)
738 __builtin_ia32_storeups256_mask ((__v8sf *) __P,
739 (__v8sf) __A,
740 (__mmask8) __U);
743 extern __inline void
744 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
745 _mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A)
747 __builtin_ia32_storeups128_mask ((__v4sf *) __P,
748 (__v4sf) __A,
749 (__mmask8) __U);
752 extern __inline __m256i
753 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
754 _mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
756 return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
757 (__v4di) __W,
758 (__mmask8) __U);
761 extern __inline __m256i
762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
763 _mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
765 return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
766 (__v4di)
767 _mm256_setzero_si256 (),
768 (__mmask8) __U);
771 extern __inline __m128i
772 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
773 _mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
775 return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
776 (__v2di) __W,
777 (__mmask8) __U);
780 extern __inline __m128i
781 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
782 _mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
784 return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
785 (__v2di)
786 _mm_setzero_di (),
787 (__mmask8) __U);
790 extern __inline void
791 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
792 _mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
794 __builtin_ia32_storedqudi256_mask ((__v4di *) __P,
795 (__v4di) __A,
796 (__mmask8) __U);
799 extern __inline void
800 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
801 _mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
803 __builtin_ia32_storedqudi128_mask ((__v2di *) __P,
804 (__v2di) __A,
805 (__mmask8) __U);
808 extern __inline __m256i
809 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
810 _mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
812 return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
813 (__v8si) __W,
814 (__mmask8) __U);
817 extern __inline __m256i
818 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
819 _mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
821 return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
822 (__v8si)
823 _mm256_setzero_si256 (),
824 (__mmask8) __U);
827 extern __inline __m128i
828 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
829 _mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
831 return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
832 (__v4si) __W,
833 (__mmask8) __U);
836 extern __inline __m128i
837 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
838 _mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
840 return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
841 (__v4si)
842 _mm_setzero_si128 (),
843 (__mmask8) __U);
846 extern __inline void
847 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
848 _mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A)
850 __builtin_ia32_storedqusi256_mask ((__v8si *) __P,
851 (__v8si) __A,
852 (__mmask8) __U);
855 extern __inline void
856 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
857 _mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
859 __builtin_ia32_storedqusi128_mask ((__v4si *) __P,
860 (__v4si) __A,
861 (__mmask8) __U);
864 extern __inline __m256i
865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
866 _mm256_mask_abs_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
868 return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
869 (__v8si) __W,
870 (__mmask8) __U);
873 extern __inline __m256i
874 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
875 _mm256_maskz_abs_epi32 (__mmask8 __U, __m256i __A)
877 return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
878 (__v8si)
879 _mm256_setzero_si256 (),
880 (__mmask8) __U);
883 extern __inline __m128i
884 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
885 _mm_mask_abs_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
887 return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
888 (__v4si) __W,
889 (__mmask8) __U);
892 extern __inline __m128i
893 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
894 _mm_maskz_abs_epi32 (__mmask8 __U, __m128i __A)
896 return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
897 (__v4si)
898 _mm_setzero_si128 (),
899 (__mmask8) __U);
902 extern __inline __m256i
903 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
904 _mm256_abs_epi64 (__m256i __A)
906 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
907 (__v4di)
908 _mm256_setzero_si256 (),
909 (__mmask8) -1);
912 extern __inline __m256i
913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
914 _mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
916 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
917 (__v4di) __W,
918 (__mmask8) __U);
921 extern __inline __m256i
922 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
923 _mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A)
925 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
926 (__v4di)
927 _mm256_setzero_si256 (),
928 (__mmask8) __U);
931 extern __inline __m128i
932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
933 _mm_abs_epi64 (__m128i __A)
935 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
936 (__v2di)
937 _mm_setzero_di (),
938 (__mmask8) -1);
941 extern __inline __m128i
942 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
943 _mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
945 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
946 (__v2di) __W,
947 (__mmask8) __U);
950 extern __inline __m128i
951 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
952 _mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A)
954 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
955 (__v2di)
956 _mm_setzero_di (),
957 (__mmask8) __U);
960 extern __inline __m128i
961 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
962 _mm256_cvtpd_epu32 (__m256d __A)
964 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
965 (__v4si)
966 _mm_setzero_si128 (),
967 (__mmask8) -1);
970 extern __inline __m128i
971 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
972 _mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A)
974 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
975 (__v4si) __W,
976 (__mmask8) __U);
979 extern __inline __m128i
980 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
981 _mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A)
983 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
984 (__v4si)
985 _mm_setzero_si128 (),
986 (__mmask8) __U);
989 extern __inline __m128i
990 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
991 _mm_cvtpd_epu32 (__m128d __A)
993 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
994 (__v4si)
995 _mm_setzero_si128 (),
996 (__mmask8) -1);
999 extern __inline __m128i
1000 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1001 _mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A)
1003 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
1004 (__v4si) __W,
1005 (__mmask8) __U);
1008 extern __inline __m128i
1009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1010 _mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A)
1012 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
1013 (__v4si)
1014 _mm_setzero_si128 (),
1015 (__mmask8) __U);
1018 extern __inline __m256i
1019 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1020 _mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A)
1022 return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
1023 (__v8si) __W,
1024 (__mmask8) __U);
1027 extern __inline __m256i
1028 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1029 _mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A)
1031 return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
1032 (__v8si)
1033 _mm256_setzero_si256 (),
1034 (__mmask8) __U);
1037 extern __inline __m128i
1038 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1039 _mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A)
1041 return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
1042 (__v4si) __W,
1043 (__mmask8) __U);
1046 extern __inline __m128i
1047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1048 _mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A)
1050 return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
1051 (__v4si)
1052 _mm_setzero_si128 (),
1053 (__mmask8) __U);
1056 extern __inline __m256i
1057 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1058 _mm256_cvttps_epu32 (__m256 __A)
1060 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
1061 (__v8si)
1062 _mm256_setzero_si256 (),
1063 (__mmask8) -1);
1066 extern __inline __m256i
1067 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1068 _mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A)
1070 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
1071 (__v8si) __W,
1072 (__mmask8) __U);
1075 extern __inline __m256i
1076 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1077 _mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A)
1079 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
1080 (__v8si)
1081 _mm256_setzero_si256 (),
1082 (__mmask8) __U);
1085 extern __inline __m128i
1086 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1087 _mm_cvttps_epu32 (__m128 __A)
1089 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
1090 (__v4si)
1091 _mm_setzero_si128 (),
1092 (__mmask8) -1);
1095 extern __inline __m128i
1096 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1097 _mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A)
1099 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
1100 (__v4si) __W,
1101 (__mmask8) __U);
1104 extern __inline __m128i
1105 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1106 _mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A)
1108 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
1109 (__v4si)
1110 _mm_setzero_si128 (),
1111 (__mmask8) __U);
1114 extern __inline __m128i
1115 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1116 _mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A)
1118 return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
1119 (__v4si) __W,
1120 (__mmask8) __U);
1123 extern __inline __m128i
1124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1125 _mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A)
1127 return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
1128 (__v4si)
1129 _mm_setzero_si128 (),
1130 (__mmask8) __U);
1133 extern __inline __m128i
1134 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1135 _mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A)
1137 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
1138 (__v4si) __W,
1139 (__mmask8) __U);
1142 extern __inline __m128i
1143 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1144 _mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A)
1146 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
1147 (__v4si)
1148 _mm_setzero_si128 (),
1149 (__mmask8) __U);
1152 extern __inline __m128i
1153 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1154 _mm256_cvttpd_epu32 (__m256d __A)
1156 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
1157 (__v4si)
1158 _mm_setzero_si128 (),
1159 (__mmask8) -1);
1162 extern __inline __m128i
1163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1164 _mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A)
1166 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
1167 (__v4si) __W,
1168 (__mmask8) __U);
1171 extern __inline __m128i
1172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1173 _mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A)
1175 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
1176 (__v4si)
1177 _mm_setzero_si128 (),
1178 (__mmask8) __U);
1181 extern __inline __m128i
1182 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1183 _mm_cvttpd_epu32 (__m128d __A)
1185 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
1186 (__v4si)
1187 _mm_setzero_si128 (),
1188 (__mmask8) -1);
1191 extern __inline __m128i
1192 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1193 _mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A)
1195 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
1196 (__v4si) __W,
1197 (__mmask8) __U);
1200 extern __inline __m128i
1201 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1202 _mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A)
1204 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
1205 (__v4si)
1206 _mm_setzero_si128 (),
1207 (__mmask8) __U);
1210 extern __inline __m128i
1211 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1212 _mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A)
1214 return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
1215 (__v4si) __W,
1216 (__mmask8) __U);
1219 extern __inline __m128i
1220 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1221 _mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A)
1223 return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
1224 (__v4si)
1225 _mm_setzero_si128 (),
1226 (__mmask8) __U);
1229 extern __inline __m128i
1230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1231 _mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A)
1233 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1234 (__v4si) __W,
1235 (__mmask8) __U);
1238 extern __inline __m128i
1239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1240 _mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A)
1242 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1243 (__v4si)
1244 _mm_setzero_si128 (),
1245 (__mmask8) __U);
1248 extern __inline __m256d
1249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1250 _mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A)
1252 return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
1253 (__v4df) __W,
1254 (__mmask8) __U);
1257 extern __inline __m256d
1258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1259 _mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A)
1261 return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
1262 (__v4df)
1263 _mm256_setzero_pd (),
1264 (__mmask8) __U);
1267 extern __inline __m128d
1268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1269 _mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A)
1271 return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
1272 (__v2df) __W,
1273 (__mmask8) __U);
1276 extern __inline __m128d
1277 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1278 _mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A)
1280 return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
1281 (__v2df)
1282 _mm_setzero_pd (),
1283 (__mmask8) __U);
1286 extern __inline __m256d
1287 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1288 _mm256_cvtepu32_pd (__m128i __A)
1290 return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
1291 (__v4df)
1292 _mm256_setzero_pd (),
1293 (__mmask8) -1);
1296 extern __inline __m256d
1297 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1298 _mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A)
1300 return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
1301 (__v4df) __W,
1302 (__mmask8) __U);
1305 extern __inline __m256d
1306 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1307 _mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A)
1309 return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
1310 (__v4df)
1311 _mm256_setzero_pd (),
1312 (__mmask8) __U);
1315 extern __inline __m128d
1316 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1317 _mm_cvtepu32_pd (__m128i __A)
1319 return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
1320 (__v2df)
1321 _mm_setzero_pd (),
1322 (__mmask8) -1);
1325 extern __inline __m128d
1326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1327 _mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A)
1329 return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
1330 (__v2df) __W,
1331 (__mmask8) __U);
1334 extern __inline __m128d
1335 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1336 _mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A)
1338 return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
1339 (__v2df)
1340 _mm_setzero_pd (),
1341 (__mmask8) __U);
1344 extern __inline __m256
1345 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1346 _mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A)
1348 return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
1349 (__v8sf) __W,
1350 (__mmask8) __U);
1353 extern __inline __m256
1354 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1355 _mm256_maskz_cvtepi32_ps (__mmask16 __U, __m256i __A)
1357 return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
1358 (__v8sf)
1359 _mm256_setzero_ps (),
1360 (__mmask8) __U);
1363 extern __inline __m128
1364 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1365 _mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A)
1367 return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
1368 (__v4sf) __W,
1369 (__mmask8) __U);
1372 extern __inline __m128
1373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1374 _mm_maskz_cvtepi32_ps (__mmask16 __U, __m128i __A)
1376 return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
1377 (__v4sf)
1378 _mm_setzero_ps (),
1379 (__mmask8) __U);
1382 extern __inline __m256
1383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1384 _mm256_cvtepu32_ps (__m256i __A)
1386 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
1387 (__v8sf)
1388 _mm256_setzero_ps (),
1389 (__mmask8) -1);
1392 extern __inline __m256
1393 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1394 _mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A)
1396 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
1397 (__v8sf) __W,
1398 (__mmask8) __U);
1401 extern __inline __m256
1402 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1403 _mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A)
1405 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
1406 (__v8sf)
1407 _mm256_setzero_ps (),
1408 (__mmask8) __U);
1411 extern __inline __m128
1412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1413 _mm_cvtepu32_ps (__m128i __A)
1415 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
1416 (__v4sf)
1417 _mm_setzero_ps (),
1418 (__mmask8) -1);
1421 extern __inline __m128
1422 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1423 _mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A)
1425 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
1426 (__v4sf) __W,
1427 (__mmask8) __U);
1430 extern __inline __m128
1431 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1432 _mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A)
1434 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
1435 (__v4sf)
1436 _mm_setzero_ps (),
1437 (__mmask8) __U);
1440 extern __inline __m256d
1441 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1442 _mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A)
1444 return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
1445 (__v4df) __W,
1446 (__mmask8) __U);
1449 extern __inline __m256d
1450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1451 _mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A)
1453 return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
1454 (__v4df)
1455 _mm256_setzero_pd (),
1456 (__mmask8) __U);
1459 extern __inline __m128d
1460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1461 _mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A)
1463 return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
1464 (__v2df) __W,
1465 (__mmask8) __U);
1468 extern __inline __m128d
1469 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1470 _mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A)
1472 return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
1473 (__v2df)
1474 _mm_setzero_pd (),
1475 (__mmask8) __U);
1478 extern __inline __m128i
1479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1480 _mm_cvtepi32_epi8 (__m128i __A)
1482 __v16qi __O;
1483 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A, __O,
1484 (__mmask8) -1);
1487 extern __inline void
1488 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1489 _mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1491 __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
1494 extern __inline __m128i
1495 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1496 _mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1498 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
1499 (__v16qi) __O, __M);
1502 extern __inline __m128i
1503 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1504 _mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A)
1506 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
1507 (__v16qi)
1508 _mm_setzero_si128 (),
1509 __M);
1512 extern __inline __m128i
1513 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1514 _mm256_cvtepi32_epi8 (__m256i __A)
1516 __v16qi __O;
1517 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, __O,
1518 (__mmask8) -1);
1521 extern __inline __m128i
1522 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1523 _mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1525 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
1526 (__v16qi) __O, __M);
1529 extern __inline void
1530 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1531 _mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1533 __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
1536 extern __inline __m128i
1537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1538 _mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A)
1540 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
1541 (__v16qi)
1542 _mm_setzero_si128 (),
1543 __M);
1546 extern __inline __m128i
1547 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1548 _mm_cvtsepi32_epi8 (__m128i __A)
1550 __v16qi __O;
1551 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, __O,
1552 (__mmask8) -1);
1555 extern __inline void
1556 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1557 _mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1559 __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
1562 extern __inline __m128i
1563 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1564 _mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1566 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
1567 (__v16qi) __O, __M);
1570 extern __inline __m128i
1571 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1572 _mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A)
1574 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
1575 (__v16qi)
1576 _mm_setzero_si128 (),
1577 __M);
1580 extern __inline __m128i
1581 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1582 _mm256_cvtsepi32_epi8 (__m256i __A)
1584 __v16qi __O;
1585 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, __O,
1586 (__mmask8) -1);
1589 extern __inline void
1590 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1591 _mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1593 __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
1596 extern __inline __m128i
1597 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1598 _mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1600 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
1601 (__v16qi) __O, __M);
1604 extern __inline __m128i
1605 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1606 _mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A)
1608 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
1609 (__v16qi)
1610 _mm_setzero_si128 (),
1611 __M);
1614 extern __inline __m128i
1615 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1616 _mm_cvtusepi32_epi8 (__m128i __A)
1618 __v16qi __O;
1619 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, __O,
1620 (__mmask8) -1);
1623 extern __inline void
1624 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1625 _mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1627 __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
1630 extern __inline __m128i
1631 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1632 _mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1634 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
1635 (__v16qi) __O,
1636 __M);
1639 extern __inline __m128i
1640 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1641 _mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A)
1643 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
1644 (__v16qi)
1645 _mm_setzero_si128 (),
1646 __M);
1649 extern __inline __m128i
1650 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1651 _mm256_cvtusepi32_epi8 (__m256i __A)
1653 __v16qi __O;
1654 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, __O,
1655 (__mmask8) -1);
1658 extern __inline void
1659 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1660 _mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1662 __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M);
1665 extern __inline __m128i
1666 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1667 _mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1669 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
1670 (__v16qi) __O,
1671 __M);
1674 extern __inline __m128i
1675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1676 _mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A)
1678 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
1679 (__v16qi)
1680 _mm_setzero_si128 (),
1681 __M);
1684 extern __inline __m128i
1685 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1686 _mm_cvtepi32_epi16 (__m128i __A)
1688 __v8hi __O;
1689 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A, __O,
1690 (__mmask8) -1);
1693 extern __inline void
1694 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1695 _mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
1697 __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
1700 extern __inline __m128i
1701 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1702 _mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
1704 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
1705 (__v8hi) __O, __M);
1708 extern __inline __m128i
1709 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1710 _mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A)
1712 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
1713 (__v8hi)
1714 _mm_setzero_si128 (),
1715 __M);
1718 extern __inline __m128i
1719 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1720 _mm256_cvtepi32_epi16 (__m256i __A)
1722 __v8hi __O;
1723 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A, __O,
1724 (__mmask8) -1);
1727 extern __inline
1728 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1729 _mm256_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
1731 __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
1734 extern __inline __m128i
1735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1736 _mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
1738 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
1739 (__v8hi) __O, __M);
1742 extern __inline __m128i
1743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1744 _mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A)
1746 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
1747 (__v8hi)
1748 _mm_setzero_si128 (),
1749 __M);
1752 extern __inline __m128i
1753 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1754 _mm_cvtsepi32_epi16 (__m128i __A)
1756 __v8hi __O;
1757 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, __O,
1758 (__mmask8) -1);
1761 extern __inline void
1762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1763 _mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
1765 __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
1768 extern __inline __m128i
1769 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1770 _mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
1772 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
1773 (__v8hi) __O, __M);
1776 extern __inline __m128i
1777 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1778 _mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A)
1780 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
1781 (__v8hi)
1782 _mm_setzero_si128 (),
1783 __M);
1786 extern __inline __m128i
1787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1788 _mm256_cvtsepi32_epi16 (__m256i __A)
1790 __v8hi __O;
1791 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, __O,
1792 (__mmask8) -1);
1795 extern __inline void
1796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1797 _mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
1799 __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
1802 extern __inline __m128i
1803 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1804 _mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
1806 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
1807 (__v8hi) __O, __M);
1810 extern __inline __m128i
1811 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1812 _mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A)
1814 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
1815 (__v8hi)
1816 _mm_setzero_si128 (),
1817 __M);
1820 extern __inline __m128i
1821 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1822 _mm_cvtusepi32_epi16 (__m128i __A)
1824 __v8hi __O;
1825 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, __O,
1826 (__mmask8) -1);
1829 extern __inline
1830 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1831 _mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
1833 __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
1836 extern __inline __m128i
1837 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1838 _mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
1840 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
1841 (__v8hi) __O, __M);
1844 extern __inline __m128i
1845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1846 _mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A)
1848 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
1849 (__v8hi)
1850 _mm_setzero_si128 (),
1851 __M);
1854 extern __inline __m128i
1855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1856 _mm256_cvtusepi32_epi16 (__m256i __A)
1858 __v8hi __O;
1859 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, __O,
1860 (__mmask8) -1);
1863 extern __inline void
1864 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1865 _mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
1867 __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
1870 extern __inline __m128i
1871 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1872 _mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
1874 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
1875 (__v8hi) __O, __M);
1878 extern __inline __m128i
1879 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1880 _mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A)
1882 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
1883 (__v8hi)
1884 _mm_setzero_si128 (),
1885 __M);
1888 extern __inline __m128i
1889 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1890 _mm_cvtepi64_epi8 (__m128i __A)
1892 __v16qi __O;
1893 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A, __O,
1894 (__mmask8) -1);
1897 extern __inline void
1898 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1899 _mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1901 __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
1904 extern __inline __m128i
1905 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1906 _mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1908 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
1909 (__v16qi) __O, __M);
1912 extern __inline __m128i
1913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1914 _mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A)
1916 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
1917 (__v16qi)
1918 _mm_setzero_si128 (),
1919 __M);
1922 extern __inline __m128i
1923 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1924 _mm256_cvtepi64_epi8 (__m256i __A)
1926 __v16qi __O;
1927 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A, __O,
1928 (__mmask8) -1);
1931 extern __inline void
1932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1933 _mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1935 __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
1938 extern __inline __m128i
1939 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1940 _mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1942 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
1943 (__v16qi) __O, __M);
1946 extern __inline __m128i
1947 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1948 _mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A)
1950 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
1951 (__v16qi)
1952 _mm_setzero_si128 (),
1953 __M);
1956 extern __inline __m128i
1957 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1958 _mm_cvtsepi64_epi8 (__m128i __A)
1960 __v16qi __O;
1961 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, __O,
1962 (__mmask8) -1);
1965 extern __inline void
1966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1967 _mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1969 __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
1972 extern __inline __m128i
1973 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1974 _mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1976 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
1977 (__v16qi) __O, __M);
1980 extern __inline __m128i
1981 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1982 _mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A)
1984 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
1985 (__v16qi)
1986 _mm_setzero_si128 (),
1987 __M);
1990 extern __inline __m128i
1991 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1992 _mm256_cvtsepi64_epi8 (__m256i __A)
1994 __v16qi __O;
1995 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, __O,
1996 (__mmask8) -1);
1999 extern __inline void
2000 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2001 _mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
2003 __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
2006 extern __inline __m128i
2007 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2008 _mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
2010 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
2011 (__v16qi) __O, __M);
2014 extern __inline __m128i
2015 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2016 _mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A)
2018 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
2019 (__v16qi)
2020 _mm_setzero_si128 (),
2021 __M);
2024 extern __inline __m128i
2025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2026 _mm_cvtusepi64_epi8 (__m128i __A)
2028 __v16qi __O;
2029 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, __O,
2030 (__mmask8) -1);
2033 extern __inline void
2034 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2035 _mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
2037 __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
2040 extern __inline __m128i
2041 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2042 _mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
2044 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
2045 (__v16qi) __O,
2046 __M);
2049 extern __inline __m128i
2050 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2051 _mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A)
2053 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
2054 (__v16qi)
2055 _mm_setzero_si128 (),
2056 __M);
2059 extern __inline __m128i
2060 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2061 _mm256_cvtusepi64_epi8 (__m256i __A)
2063 __v16qi __O;
2064 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, __O,
2065 (__mmask8) -1);
2068 extern __inline void
2069 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2070 _mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
2072 __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
2075 extern __inline __m128i
2076 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2077 _mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
2079 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
2080 (__v16qi) __O,
2081 __M);
2084 extern __inline __m128i
2085 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2086 _mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A)
2088 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
2089 (__v16qi)
2090 _mm_setzero_si128 (),
2091 __M);
2094 extern __inline __m128i
2095 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2096 _mm_cvtepi64_epi16 (__m128i __A)
2098 __v8hi __O;
2099 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A, __O,
2100 (__mmask8) -1);
2103 extern __inline void
2104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2105 _mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
2107 __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
2110 extern __inline __m128i
2111 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2112 _mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
2114 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
2115 (__v8hi) __O, __M);
2118 extern __inline __m128i
2119 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2120 _mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A)
2122 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
2123 (__v8hi)
2124 _mm_setzero_si128 (),
2125 __M);
2128 extern __inline __m128i
2129 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2130 _mm256_cvtepi64_epi16 (__m256i __A)
2132 __v8hi __O;
2133 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A, __O,
2134 (__mmask8) -1);
2137 extern __inline void
2138 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2139 _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
2141 __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
2144 extern __inline __m128i
2145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2146 _mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
2148 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
2149 (__v8hi) __O, __M);
2152 extern __inline __m128i
2153 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2154 _mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A)
2156 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
2157 (__v8hi)
2158 _mm_setzero_si128 (),
2159 __M);
2162 extern __inline __m128i
2163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2164 _mm_cvtsepi64_epi16 (__m128i __A)
2166 __v8hi __O;
2167 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, __O,
2168 (__mmask8) -1);
2171 extern __inline void
2172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2173 _mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
2175 __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
2178 extern __inline __m128i
2179 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2180 _mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
2182 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
2183 (__v8hi) __O, __M);
2186 extern __inline __m128i
2187 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2188 _mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A)
2190 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
2191 (__v8hi)
2192 _mm_setzero_si128 (),
2193 __M);
2196 extern __inline __m128i
2197 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2198 _mm256_cvtsepi64_epi16 (__m256i __A)
2200 __v8hi __O;
2201 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, __O,
2202 (__mmask8) -1);
2205 extern __inline void
2206 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2207 _mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
2209 __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
2212 extern __inline __m128i
2213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2214 _mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
2216 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
2217 (__v8hi) __O, __M);
2220 extern __inline __m128i
2221 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2222 _mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A)
2224 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
2225 (__v8hi)
2226 _mm_setzero_si128 (),
2227 __M);
2230 extern __inline __m128i
2231 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2232 _mm_cvtusepi64_epi16 (__m128i __A)
2234 __v8hi __O;
2235 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, __O,
2236 (__mmask8) -1);
2239 extern __inline void
2240 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2241 _mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
2243 __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
2246 extern __inline __m128i
2247 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2248 _mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
2250 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
2251 (__v8hi) __O, __M);
2254 extern __inline __m128i
2255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2256 _mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A)
2258 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
2259 (__v8hi)
2260 _mm_setzero_si128 (),
2261 __M);
2264 extern __inline __m128i
2265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2266 _mm256_cvtusepi64_epi16 (__m256i __A)
2268 __v8hi __O;
2269 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, __O,
2270 (__mmask8) -1);
2273 extern __inline void
2274 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2275 _mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
2277 __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
2280 extern __inline __m128i
2281 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2282 _mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
2284 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
2285 (__v8hi) __O, __M);
2288 extern __inline __m128i
2289 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2290 _mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A)
2292 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
2293 (__v8hi)
2294 _mm_setzero_si128 (),
2295 __M);
2298 extern __inline __m128i
2299 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2300 _mm_cvtepi64_epi32 (__m128i __A)
2302 __v4si __O;
2303 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A, __O,
2304 (__mmask8) -1);
2307 extern __inline void
2308 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2309 _mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
2311 __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
2314 extern __inline __m128i
2315 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2316 _mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2318 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
2319 (__v4si) __O, __M);
2322 extern __inline __m128i
2323 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2324 _mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A)
2326 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
2327 (__v4si)
2328 _mm_setzero_si128 (),
2329 __M);
2332 extern __inline __m128i
2333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2334 _mm256_cvtepi64_epi32 (__m256i __A)
2336 __v4si __O;
2337 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A, __O,
2338 (__mmask8) -1);
2341 extern __inline void
2342 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2343 _mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
2345 __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
2348 extern __inline __m128i
2349 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2350 _mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
2352 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
2353 (__v4si) __O, __M);
2356 extern __inline __m128i
2357 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2358 _mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A)
2360 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
2361 (__v4si)
2362 _mm_setzero_si128 (),
2363 __M);
2366 extern __inline __m128i
2367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2368 _mm_cvtsepi64_epi32 (__m128i __A)
2370 __v4si __O;
2371 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, __O,
2372 (__mmask8) -1);
2375 extern __inline
2376 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2377 _mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
2379 __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
2382 extern __inline __m128i
2383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2384 _mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2386 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
2387 (__v4si) __O, __M);
2390 extern __inline __m128i
2391 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2392 _mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A)
2394 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
2395 (__v4si)
2396 _mm_setzero_si128 (),
2397 __M);
2400 extern __inline __m128i
2401 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2402 _mm256_cvtsepi64_epi32 (__m256i __A)
2404 __v4si __O;
2405 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, __O,
2406 (__mmask8) -1);
2409 extern __inline void
2410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2411 _mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
2413 __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
2416 extern __inline __m128i
2417 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2418 _mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
2420 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
2421 (__v4si) __O, __M);
2424 extern __inline __m128i
2425 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2426 _mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A)
2428 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
2429 (__v4si)
2430 _mm_setzero_si128 (),
2431 __M);
2434 extern __inline __m128i
2435 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2436 _mm_cvtusepi64_epi32 (__m128i __A)
2438 __v4si __O;
2439 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, __O,
2440 (__mmask8) -1);
2443 extern __inline void
2444 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2445 _mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
2447 __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
2450 extern __inline __m128i
2451 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2452 _mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2454 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
2455 (__v4si) __O, __M);
2458 extern __inline __m128i
2459 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2460 _mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A)
2462 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
2463 (__v4si)
2464 _mm_setzero_si128 (),
2465 __M);
2468 extern __inline __m128i
2469 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2470 _mm256_cvtusepi64_epi32 (__m256i __A)
2472 __v4si __O;
2473 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, __O,
2474 (__mmask8) -1);
2477 extern __inline void
2478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2479 _mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
2481 __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
2484 extern __inline __m128i
2485 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2486 _mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
2488 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
2489 (__v4si) __O, __M);
2492 extern __inline __m128i
2493 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2494 _mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A)
2496 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
2497 (__v4si)
2498 _mm_setzero_si128 (),
2499 __M);
2502 extern __inline __m256
2503 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2504 _mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A)
2506 return (__m256) __builtin_ia32_broadcastss256_mask ((__v4sf) __A,
2507 (__v8sf) __O,
2508 __M);
2511 extern __inline __m256
2512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2513 _mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
2515 return (__m256) __builtin_ia32_broadcastss256_mask ((__v4sf) __A,
2516 (__v8sf)
2517 _mm256_setzero_ps (),
2518 __M);
2521 extern __inline __m128
2522 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2523 _mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A)
2525 return (__m128) __builtin_ia32_broadcastss128_mask ((__v4sf) __A,
2526 (__v4sf) __O,
2527 __M);
2530 extern __inline __m128
2531 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2532 _mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
2534 return (__m128) __builtin_ia32_broadcastss128_mask ((__v4sf) __A,
2535 (__v4sf)
2536 _mm_setzero_ps (),
2537 __M);
2540 extern __inline __m256d
2541 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2542 _mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A)
2544 return (__m256d) __builtin_ia32_broadcastsd256_mask ((__v2df) __A,
2545 (__v4df) __O,
2546 __M);
2549 extern __inline __m256d
2550 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2551 _mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
2553 return (__m256d) __builtin_ia32_broadcastsd256_mask ((__v2df) __A,
2554 (__v4df)
2555 _mm256_setzero_pd (),
2556 __M);
2559 extern __inline __m256i
2560 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2561 _mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A)
2563 return (__m256i) __builtin_ia32_pbroadcastd256_mask ((__v4si) __A,
2564 (__v8si) __O,
2565 __M);
2568 extern __inline __m256i
2569 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2570 _mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
2572 return (__m256i) __builtin_ia32_pbroadcastd256_mask ((__v4si) __A,
2573 (__v8si)
2574 _mm256_setzero_si256 (),
2575 __M);
2578 extern __inline __m256i
2579 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2580 _mm256_mask_set1_epi32 (__m256i __O, __mmask8 __M, int __A)
2582 return (__m256i) __builtin_ia32_pbroadcastd256_gpr_mask (__A, (__v8si) __O,
2583 __M);
2586 extern __inline __m256i
2587 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2588 _mm256_maskz_set1_epi32 (__mmask8 __M, int __A)
2590 return (__m256i) __builtin_ia32_pbroadcastd256_gpr_mask (__A,
2591 (__v8si)
2592 _mm256_setzero_si256 (),
2593 __M);
2596 extern __inline __m128i
2597 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2598 _mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2600 return (__m128i) __builtin_ia32_pbroadcastd128_mask ((__v4si) __A,
2601 (__v4si) __O,
2602 __M);
2605 extern __inline __m128i
2606 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2607 _mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
2609 return (__m128i) __builtin_ia32_pbroadcastd128_mask ((__v4si) __A,
2610 (__v4si)
2611 _mm_setzero_si128 (),
2612 __M);
2615 extern __inline __m128i
2616 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2617 _mm_mask_set1_epi32 (__m128i __O, __mmask8 __M, int __A)
2619 return (__m128i) __builtin_ia32_pbroadcastd128_gpr_mask (__A, (__v4si) __O,
2620 __M);
2623 extern __inline __m128i
2624 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2625 _mm_maskz_set1_epi32 (__mmask8 __M, int __A)
2627 return (__m128i) __builtin_ia32_pbroadcastd128_gpr_mask (__A,
2628 (__v4si)
2629 _mm_setzero_si128 (),
2630 __M);
2633 extern __inline __m256i
2634 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2635 _mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A)
2637 return (__m256i) __builtin_ia32_pbroadcastq256_mask ((__v2di) __A,
2638 (__v4di) __O,
2639 __M);
2642 extern __inline __m256i
2643 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2644 _mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
2646 return (__m256i) __builtin_ia32_pbroadcastq256_mask ((__v2di) __A,
2647 (__v4di)
2648 _mm256_setzero_si256 (),
2649 __M);
2652 extern __inline __m256i
2653 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2654 _mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
2656 #ifdef TARGET_64BIT
2657 return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A, (__v4di) __O,
2658 __M);
2659 #else
2660 return (__m256i) __builtin_ia32_pbroadcastq256_mem_mask (__A, (__v4di) __O,
2661 __M);
2662 #endif
2665 extern __inline __m256i
2666 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2667 _mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
2669 #ifdef TARGET_64BIT
2670 return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A,
2671 (__v4di)
2672 _mm256_setzero_si256 (),
2673 __M);
2674 #else
2675 return (__m256i) __builtin_ia32_pbroadcastq256_mem_mask (__A,
2676 (__v4di)
2677 _mm256_setzero_si256 (),
2678 __M);
2679 #endif
2682 extern __inline __m128i
2683 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2684 _mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A)
2686 return (__m128i) __builtin_ia32_pbroadcastq128_mask ((__v2di) __A,
2687 (__v2di) __O,
2688 __M);
2691 extern __inline __m128i
2692 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2693 _mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
2695 return (__m128i) __builtin_ia32_pbroadcastq128_mask ((__v2di) __A,
2696 (__v2di)
2697 _mm_setzero_si128 (),
2698 __M);
2701 extern __inline __m128i
2702 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2703 _mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
2705 #ifdef TARGET_64BIT
2706 return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A, (__v2di) __O,
2707 __M);
2708 #else
2709 return (__m128i) __builtin_ia32_pbroadcastq128_mem_mask (__A, (__v2di) __O,
2710 __M);
2711 #endif
2714 extern __inline __m128i
2715 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2716 _mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
2718 #ifdef TARGET_64BIT
2719 return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A,
2720 (__v2di)
2721 _mm_setzero_si128 (),
2722 __M);
2723 #else
2724 return (__m128i) __builtin_ia32_pbroadcastq128_mem_mask (__A,
2725 (__v2di)
2726 _mm_setzero_si128 (),
2727 __M);
2728 #endif
2731 extern __inline __m256
2732 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2733 _mm256_broadcast_f32x4 (__m128 __A)
2735 __v8sf __O;
2736 return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
2737 __O,
2738 (__mmask8) -
2742 extern __inline __m256
2743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2744 _mm256_mask_broadcast_f32x4 (__m256 __O, __mmask8 __M, __m128 __A)
2746 return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
2747 (__v8sf) __O,
2748 __M);
2751 extern __inline __m256
2752 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2753 _mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A)
2755 return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
2756 (__v8sf)
2757 _mm256_setzero_ps (),
2758 __M);
2761 extern __inline __m256i
2762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2763 _mm256_broadcast_i32x4 (__m128i __A)
2765 __v8si __O;
2766 return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
2767 __A, __O,
2768 (__mmask8) -
2772 extern __inline __m256i
2773 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2774 _mm256_mask_broadcast_i32x4 (__m256i __O, __mmask8 __M, __m128i __A)
2776 return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
2777 __A,
2778 (__v8si)
2779 __O, __M);
2782 extern __inline __m256i
2783 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2784 _mm256_maskz_broadcast_i32x4 (__mmask8 __M, __m128i __A)
2786 return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
2787 __A,
2788 (__v8si)
2789 _mm256_setzero_si256 (),
2790 __M);
2793 extern __inline __m256i
2794 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2795 _mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
2797 return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A,
2798 (__v8si) __W,
2799 (__mmask8) __U);
2802 extern __inline __m256i
2803 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2804 _mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
2806 return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A,
2807 (__v8si)
2808 _mm256_setzero_si256 (),
2809 (__mmask8) __U);
2812 extern __inline __m128i
2813 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2814 _mm_mask_cvtepi8_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
2816 return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A,
2817 (__v4si) __W,
2818 (__mmask8) __U);
2821 extern __inline __m128i
2822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2823 _mm_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
2825 return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A,
2826 (__v4si)
2827 _mm_setzero_si128 (),
2828 (__mmask8) __U);
2831 extern __inline __m256i
2832 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2833 _mm256_mask_cvtepi8_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
2835 return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A,
2836 (__v4di) __W,
2837 (__mmask8) __U);
2840 extern __inline __m256i
2841 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2842 _mm256_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
2844 return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A,
2845 (__v4di)
2846 _mm256_setzero_si256 (),
2847 (__mmask8) __U);
2850 extern __inline __m128i
2851 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2852 _mm_mask_cvtepi8_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
2854 return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A,
2855 (__v2di) __W,
2856 (__mmask8) __U);
2859 extern __inline __m128i
2860 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2861 _mm_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
2863 return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A,
2864 (__v2di)
2865 _mm_setzero_si128 (),
2866 (__mmask8) __U);
2869 extern __inline __m256i
2870 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2871 _mm256_mask_cvtepi16_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
2873 return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A,
2874 (__v8si) __W,
2875 (__mmask8) __U);
2878 extern __inline __m256i
2879 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2880 _mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
2882 return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A,
2883 (__v8si)
2884 _mm256_setzero_si256 (),
2885 (__mmask8) __U);
2888 extern __inline __m128i
2889 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2890 _mm_mask_cvtepi16_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
2892 return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A,
2893 (__v4si) __W,
2894 (__mmask8) __U);
2897 extern __inline __m128i
2898 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2899 _mm_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
2901 return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A,
2902 (__v4si)
2903 _mm_setzero_si128 (),
2904 (__mmask8) __U);
2907 extern __inline __m256i
2908 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2909 _mm256_mask_cvtepi16_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
2911 return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A,
2912 (__v4di) __W,
2913 (__mmask8) __U);
2916 extern __inline __m256i
2917 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2918 _mm256_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
2920 return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A,
2921 (__v4di)
2922 _mm256_setzero_si256 (),
2923 (__mmask8) __U);
2926 extern __inline __m128i
2927 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2928 _mm_mask_cvtepi16_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
2930 return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A,
2931 (__v2di) __W,
2932 (__mmask8) __U);
2935 extern __inline __m128i
2936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2937 _mm_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
2939 return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A,
2940 (__v2di)
2941 _mm_setzero_si128 (),
2942 (__mmask8) __U);
2945 extern __inline __m256i
2946 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2947 _mm256_mask_cvtepi32_epi64 (__m256i __W, __mmask8 __U, __m128i __X)
2949 return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X,
2950 (__v4di) __W,
2951 (__mmask8) __U);
2954 extern __inline __m256i
2955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2956 _mm256_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X)
2958 return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X,
2959 (__v4di)
2960 _mm256_setzero_si256 (),
2961 (__mmask8) __U);
2964 extern __inline __m128i
2965 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2966 _mm_mask_cvtepi32_epi64 (__m128i __W, __mmask8 __U, __m128i __X)
2968 return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X,
2969 (__v2di) __W,
2970 (__mmask8) __U);
2973 extern __inline __m128i
2974 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2975 _mm_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X)
2977 return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X,
2978 (__v2di)
2979 _mm_setzero_si128 (),
2980 (__mmask8) __U);
2983 extern __inline __m256i
2984 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2985 _mm256_mask_cvtepu8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
2987 return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A,
2988 (__v8si) __W,
2989 (__mmask8) __U);
2992 extern __inline __m256i
2993 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2994 _mm256_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A)
2996 return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A,
2997 (__v8si)
2998 _mm256_setzero_si256 (),
2999 (__mmask8) __U);
3002 extern __inline __m128i
3003 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3004 _mm_mask_cvtepu8_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
3006 return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A,
3007 (__v4si) __W,
3008 (__mmask8) __U);
3011 extern __inline __m128i
3012 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3013 _mm_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A)
3015 return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A,
3016 (__v4si)
3017 _mm_setzero_si128 (),
3018 (__mmask8) __U);
3021 extern __inline __m256i
3022 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3023 _mm256_mask_cvtepu8_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
3025 return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A,
3026 (__v4di) __W,
3027 (__mmask8) __U);
3030 extern __inline __m256i
3031 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3032 _mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
3034 return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A,
3035 (__v4di)
3036 _mm256_setzero_si256 (),
3037 (__mmask8) __U);
3040 extern __inline __m128i
3041 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3042 _mm_mask_cvtepu8_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
3044 return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A,
3045 (__v2di) __W,
3046 (__mmask8) __U);
3049 extern __inline __m128i
3050 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3051 _mm_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
3053 return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A,
3054 (__v2di)
3055 _mm_setzero_si128 (),
3056 (__mmask8) __U);
3059 extern __inline __m256i
3060 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3061 _mm256_mask_cvtepu16_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
3063 return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A,
3064 (__v8si) __W,
3065 (__mmask8) __U);
3068 extern __inline __m256i
3069 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3070 _mm256_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A)
3072 return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A,
3073 (__v8si)
3074 _mm256_setzero_si256 (),
3075 (__mmask8) __U);
3078 extern __inline __m128i
3079 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3080 _mm_mask_cvtepu16_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
3082 return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A,
3083 (__v4si) __W,
3084 (__mmask8) __U);
3087 extern __inline __m128i
3088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3089 _mm_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A)
3091 return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A,
3092 (__v4si)
3093 _mm_setzero_si128 (),
3094 (__mmask8) __U);
3097 extern __inline __m256i
3098 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3099 _mm256_mask_cvtepu16_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
3101 return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A,
3102 (__v4di) __W,
3103 (__mmask8) __U);
3106 extern __inline __m256i
3107 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3108 _mm256_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
3110 return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A,
3111 (__v4di)
3112 _mm256_setzero_si256 (),
3113 (__mmask8) __U);
3116 extern __inline __m128i
3117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3118 _mm_mask_cvtepu16_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
3120 return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A,
3121 (__v2di) __W,
3122 (__mmask8) __U);
3125 extern __inline __m128i
3126 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3127 _mm_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
3129 return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A,
3130 (__v2di)
3131 _mm_setzero_si128 (),
3132 (__mmask8) __U);
3135 extern __inline __m256i
3136 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3137 _mm256_mask_cvtepu32_epi64 (__m256i __W, __mmask8 __U, __m128i __X)
3139 return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X,
3140 (__v4di) __W,
3141 (__mmask8) __U);
3144 extern __inline __m256i
3145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3146 _mm256_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X)
3148 return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X,
3149 (__v4di)
3150 _mm256_setzero_si256 (),
3151 (__mmask8) __U);
3154 extern __inline __m128i
3155 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3156 _mm_mask_cvtepu32_epi64 (__m128i __W, __mmask8 __U, __m128i __X)
3158 return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X,
3159 (__v2di) __W,
3160 (__mmask8) __U);
3163 extern __inline __m128i
3164 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3165 _mm_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X)
3167 return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X,
3168 (__v2di)
3169 _mm_setzero_si128 (),
3170 (__mmask8) __U);
3173 extern __inline __m256d
3174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3175 _mm256_rcp14_pd (__m256d __A)
3177 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
3178 (__v4df)
3179 _mm256_setzero_pd (),
3180 (__mmask8) -1);
3183 extern __inline __m256d
3184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3185 _mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A)
3187 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
3188 (__v4df) __W,
3189 (__mmask8) __U);
3192 extern __inline __m256d
3193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3194 _mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A)
3196 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
3197 (__v4df)
3198 _mm256_setzero_pd (),
3199 (__mmask8) __U);
3202 extern __inline __m128d
3203 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3204 _mm_rcp14_pd (__m128d __A)
3206 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
3207 (__v2df)
3208 _mm_setzero_pd (),
3209 (__mmask8) -1);
3212 extern __inline __m128d
3213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3214 _mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A)
3216 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
3217 (__v2df) __W,
3218 (__mmask8) __U);
3221 extern __inline __m128d
3222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3223 _mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A)
3225 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
3226 (__v2df)
3227 _mm_setzero_pd (),
3228 (__mmask8) __U);
3231 extern __inline __m256
3232 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3233 _mm256_rcp14_ps (__m256 __A)
3235 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
3236 (__v8sf)
3237 _mm256_setzero_ps (),
3238 (__mmask8) -1);
3241 extern __inline __m256
3242 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3243 _mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A)
3245 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
3246 (__v8sf) __W,
3247 (__mmask8) __U);
3250 extern __inline __m256
3251 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3252 _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A)
3254 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
3255 (__v8sf)
3256 _mm256_setzero_ps (),
3257 (__mmask8) __U);
3260 extern __inline __m128
3261 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3262 _mm_rcp14_ps (__m128 __A)
3264 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
3265 (__v4sf)
3266 _mm_setzero_ps (),
3267 (__mmask8) -1);
3270 extern __inline __m128
3271 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3272 _mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A)
3274 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
3275 (__v4sf) __W,
3276 (__mmask8) __U);
3279 extern __inline __m128
3280 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3281 _mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A)
3283 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
3284 (__v4sf)
3285 _mm_setzero_ps (),
3286 (__mmask8) __U);
3289 extern __inline __m256d
3290 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3291 _mm256_rsqrt14_pd (__m256d __A)
3293 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
3294 (__v4df)
3295 _mm256_setzero_pd (),
3296 (__mmask8) -1);
3299 extern __inline __m256d
3300 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3301 _mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A)
3303 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
3304 (__v4df) __W,
3305 (__mmask8) __U);
3308 extern __inline __m256d
3309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3310 _mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A)
3312 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
3313 (__v4df)
3314 _mm256_setzero_pd (),
3315 (__mmask8) __U);
3318 extern __inline __m128d
3319 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3320 _mm_rsqrt14_pd (__m128d __A)
3322 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
3323 (__v2df)
3324 _mm_setzero_pd (),
3325 (__mmask8) -1);
3328 extern __inline __m128d
3329 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3330 _mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A)
3332 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
3333 (__v2df) __W,
3334 (__mmask8) __U);
3337 extern __inline __m128d
3338 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3339 _mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A)
3341 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
3342 (__v2df)
3343 _mm_setzero_pd (),
3344 (__mmask8) __U);
3347 extern __inline __m256
3348 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3349 _mm256_rsqrt14_ps (__m256 __A)
3351 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
3352 (__v8sf)
3353 _mm256_setzero_ps (),
3354 (__mmask8) -1);
3357 extern __inline __m256
3358 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3359 _mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A)
3361 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
3362 (__v8sf) __W,
3363 (__mmask8) __U);
3366 extern __inline __m256
3367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3368 _mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A)
3370 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
3371 (__v8sf)
3372 _mm256_setzero_ps (),
3373 (__mmask8) __U);
3376 extern __inline __m128
3377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3378 _mm_rsqrt14_ps (__m128 __A)
3380 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
3381 (__v4sf)
3382 _mm_setzero_ps (),
3383 (__mmask8) -1);
3386 extern __inline __m128
3387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3388 _mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A)
3390 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
3391 (__v4sf) __W,
3392 (__mmask8) __U);
3395 extern __inline __m128
3396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3397 _mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A)
3399 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
3400 (__v4sf)
3401 _mm_setzero_ps (),
3402 (__mmask8) __U);
3405 extern __inline __m256d
3406 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3407 _mm256_mask_sqrt_pd (__m256d __W, __mmask8 __U, __m256d __A)
3409 return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
3410 (__v4df) __W,
3411 (__mmask8) __U);
3414 extern __inline __m256d
3415 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3416 _mm256_maskz_sqrt_pd (__mmask8 __U, __m256d __A)
3418 return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
3419 (__v4df)
3420 _mm256_setzero_pd (),
3421 (__mmask8) __U);
3424 extern __inline __m128d
3425 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3426 _mm_mask_sqrt_pd (__m128d __W, __mmask8 __U, __m128d __A)
3428 return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
3429 (__v2df) __W,
3430 (__mmask8) __U);
3433 extern __inline __m128d
3434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3435 _mm_maskz_sqrt_pd (__mmask8 __U, __m128d __A)
3437 return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
3438 (__v2df)
3439 _mm_setzero_pd (),
3440 (__mmask8) __U);
3443 extern __inline __m256
3444 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3445 _mm256_mask_sqrt_ps (__m256 __W, __mmask8 __U, __m256 __A)
3447 return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
3448 (__v8sf) __W,
3449 (__mmask8) __U);
3452 extern __inline __m256
3453 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3454 _mm256_maskz_sqrt_ps (__mmask8 __U, __m256 __A)
3456 return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
3457 (__v8sf)
3458 _mm256_setzero_ps (),
3459 (__mmask8) __U);
3462 extern __inline __m128
3463 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3464 _mm_mask_sqrt_ps (__m128 __W, __mmask8 __U, __m128 __A)
3466 return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
3467 (__v4sf) __W,
3468 (__mmask8) __U);
3471 extern __inline __m128
3472 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3473 _mm_maskz_sqrt_ps (__mmask8 __U, __m128 __A)
3475 return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
3476 (__v4sf)
3477 _mm_setzero_ps (),
3478 (__mmask8) __U);
3481 extern __inline __m256i
3482 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3483 _mm256_mask_add_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3484 __m256i __B)
3486 return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
3487 (__v8si) __B,
3488 (__v8si) __W,
3489 (__mmask8) __U);
3492 extern __inline __m256i
3493 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3494 _mm256_maskz_add_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
3496 return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
3497 (__v8si) __B,
3498 (__v8si)
3499 _mm256_setzero_si256 (),
3500 (__mmask8) __U);
3503 extern __inline __m256i
3504 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3505 _mm256_mask_add_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
3506 __m256i __B)
3508 return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
3509 (__v4di) __B,
3510 (__v4di) __W,
3511 (__mmask8) __U);
3514 extern __inline __m256i
3515 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3516 _mm256_maskz_add_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
3518 return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
3519 (__v4di) __B,
3520 (__v4di)
3521 _mm256_setzero_si256 (),
3522 (__mmask8) __U);
3525 extern __inline __m256i
3526 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3527 _mm256_mask_sub_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3528 __m256i __B)
3530 return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
3531 (__v8si) __B,
3532 (__v8si) __W,
3533 (__mmask8) __U);
3536 extern __inline __m256i
3537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3538 _mm256_maskz_sub_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
3540 return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
3541 (__v8si) __B,
3542 (__v8si)
3543 _mm256_setzero_si256 (),
3544 (__mmask8) __U);
3547 extern __inline __m256i
3548 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3549 _mm256_mask_sub_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
3550 __m256i __B)
3552 return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
3553 (__v4di) __B,
3554 (__v4di) __W,
3555 (__mmask8) __U);
3558 extern __inline __m256i
3559 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3560 _mm256_maskz_sub_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
3562 return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
3563 (__v4di) __B,
3564 (__v4di)
3565 _mm256_setzero_si256 (),
3566 (__mmask8) __U);
3569 extern __inline __m128i
3570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3571 _mm_mask_add_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
3572 __m128i __B)
3574 return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
3575 (__v4si) __B,
3576 (__v4si) __W,
3577 (__mmask8) __U);
3580 extern __inline __m128i
3581 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3582 _mm_maskz_add_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
3584 return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
3585 (__v4si) __B,
3586 (__v4si)
3587 _mm_setzero_si128 (),
3588 (__mmask8) __U);
3591 extern __inline __m128i
3592 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3593 _mm_mask_add_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
3594 __m128i __B)
3596 return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
3597 (__v2di) __B,
3598 (__v2di) __W,
3599 (__mmask8) __U);
3602 extern __inline __m128i
3603 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3604 _mm_maskz_add_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
3606 return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
3607 (__v2di) __B,
3608 (__v2di)
3609 _mm_setzero_si128 (),
3610 (__mmask8) __U);
3613 extern __inline __m128i
3614 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3615 _mm_mask_sub_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
3616 __m128i __B)
3618 return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
3619 (__v4si) __B,
3620 (__v4si) __W,
3621 (__mmask8) __U);
3624 extern __inline __m128i
3625 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3626 _mm_maskz_sub_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
3628 return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
3629 (__v4si) __B,
3630 (__v4si)
3631 _mm_setzero_si128 (),
3632 (__mmask8) __U);
3635 extern __inline __m128i
3636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3637 _mm_mask_sub_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
3638 __m128i __B)
3640 return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
3641 (__v2di) __B,
3642 (__v2di) __W,
3643 (__mmask8) __U);
3646 extern __inline __m128i
3647 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3648 _mm_maskz_sub_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
3650 return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
3651 (__v2di) __B,
3652 (__v2di)
3653 _mm_setzero_si128 (),
3654 (__mmask8) __U);
3657 extern __inline __m256
3658 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3659 _mm256_getexp_ps (__m256 __A)
3661 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3662 (__v8sf)
3663 _mm256_setzero_ps (),
3664 (__mmask8) -1);
3667 extern __inline __m256
3668 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3669 _mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A)
3671 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3672 (__v8sf) __W,
3673 (__mmask8) __U);
3676 extern __inline __m256
3677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3678 _mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A)
3680 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3681 (__v8sf)
3682 _mm256_setzero_ps (),
3683 (__mmask8) __U);
3686 extern __inline __m256d
3687 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3688 _mm256_getexp_pd (__m256d __A)
3690 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3691 (__v4df)
3692 _mm256_setzero_pd (),
3693 (__mmask8) -1);
3696 extern __inline __m256d
3697 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3698 _mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A)
3700 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3701 (__v4df) __W,
3702 (__mmask8) __U);
3705 extern __inline __m256d
3706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3707 _mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A)
3709 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3710 (__v4df)
3711 _mm256_setzero_pd (),
3712 (__mmask8) __U);
3715 extern __inline __m128
3716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3717 _mm_getexp_ps (__m128 __A)
3719 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3720 (__v4sf)
3721 _mm_setzero_ps (),
3722 (__mmask8) -1);
3725 extern __inline __m128
3726 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3727 _mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A)
3729 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3730 (__v4sf) __W,
3731 (__mmask8) __U);
3734 extern __inline __m128
3735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3736 _mm_maskz_getexp_ps (__mmask8 __U, __m128 __A)
3738 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3739 (__v4sf)
3740 _mm_setzero_ps (),
3741 (__mmask8) __U);
3744 extern __inline __m128d
3745 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3746 _mm_getexp_pd (__m128d __A)
3748 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3749 (__v2df)
3750 _mm_setzero_pd (),
3751 (__mmask8) -1);
3754 extern __inline __m128d
3755 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3756 _mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A)
3758 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3759 (__v2df) __W,
3760 (__mmask8) __U);
3763 extern __inline __m128d
3764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3765 _mm_maskz_getexp_pd (__mmask8 __U, __m128d __A)
3767 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3768 (__v2df)
3769 _mm_setzero_pd (),
3770 (__mmask8) __U);
3773 extern __inline __m256i
3774 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3775 _mm256_mask_srl_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3776 __m128i __B)
3778 return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A,
3779 (__v4si) __B,
3780 (__v8si) __W,
3781 (__mmask8) __U);
3784 extern __inline __m256i
3785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3786 _mm256_maskz_srl_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
3788 return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A,
3789 (__v4si) __B,
3790 (__v8si)
3791 _mm256_setzero_si256 (),
3792 (__mmask8) __U);
3795 extern __inline __m128i
3796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3797 _mm_mask_srl_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
3798 __m128i __B)
3800 return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A,
3801 (__v4si) __B,
3802 (__v4si) __W,
3803 (__mmask8) __U);
3806 extern __inline __m128i
3807 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3808 _mm_maskz_srl_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
3810 return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A,
3811 (__v4si) __B,
3812 (__v4si)
3813 _mm_setzero_si128 (),
3814 (__mmask8) __U);
3817 extern __inline __m256i
3818 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3819 _mm256_mask_srl_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
3820 __m128i __B)
3822 return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A,
3823 (__v2di) __B,
3824 (__v4di) __W,
3825 (__mmask8) __U);
3828 extern __inline __m256i
3829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3830 _mm256_maskz_srl_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
3832 return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A,
3833 (__v2di) __B,
3834 (__v4di)
3835 _mm256_setzero_si256 (),
3836 (__mmask8) __U);
3839 extern __inline __m128i
3840 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3841 _mm_mask_srl_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
3842 __m128i __B)
3844 return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
3845 (__v2di) __B,
3846 (__v2di) __W,
3847 (__mmask8) __U);
3850 extern __inline __m128i
3851 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3852 _mm_maskz_srl_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
3854 return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
3855 (__v2di) __B,
3856 (__v2di)
3857 _mm_setzero_di (),
3858 (__mmask8) __U);
3861 extern __inline __m256i
3862 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3863 _mm256_mask_and_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3864 __m256i __B)
3866 return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A,
3867 (__v8si) __B,
3868 (__v8si) __W,
3869 (__mmask8) __U);
3872 extern __inline __m256i
3873 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3874 _mm256_maskz_and_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
3876 return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A,
3877 (__v8si) __B,
3878 (__v8si)
3879 _mm256_setzero_si256 (),
3880 (__mmask8) __U);
3883 extern __inline __m256d
3884 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3885 _mm256_scalef_pd (__m256d __A, __m256d __B)
3887 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3888 (__v4df) __B,
3889 (__v4df)
3890 _mm256_setzero_pd (),
3891 (__mmask8) -1);
3894 extern __inline __m256d
3895 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3896 _mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A,
3897 __m256d __B)
3899 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3900 (__v4df) __B,
3901 (__v4df) __W,
3902 (__mmask8) __U);
3905 extern __inline __m256d
3906 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3907 _mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B)
3909 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3910 (__v4df) __B,
3911 (__v4df)
3912 _mm256_setzero_pd (),
3913 (__mmask8) __U);
3916 extern __inline __m256
3917 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3918 _mm256_scalef_ps (__m256 __A, __m256 __B)
3920 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3921 (__v8sf) __B,
3922 (__v8sf)
3923 _mm256_setzero_ps (),
3924 (__mmask8) -1);
3927 extern __inline __m256
3928 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3929 _mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A,
3930 __m256 __B)
3932 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3933 (__v8sf) __B,
3934 (__v8sf) __W,
3935 (__mmask8) __U);
3938 extern __inline __m256
3939 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3940 _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B)
3942 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3943 (__v8sf) __B,
3944 (__v8sf)
3945 _mm256_setzero_ps (),
3946 (__mmask8) __U);
3949 extern __inline __m128d
3950 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3951 _mm_scalef_pd (__m128d __A, __m128d __B)
3953 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3954 (__v2df) __B,
3955 (__v2df)
3956 _mm_setzero_pd (),
3957 (__mmask8) -1);
3960 extern __inline __m128d
3961 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3962 _mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A,
3963 __m128d __B)
3965 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3966 (__v2df) __B,
3967 (__v2df) __W,
3968 (__mmask8) __U);
3971 extern __inline __m128d
3972 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3973 _mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B)
3975 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3976 (__v2df) __B,
3977 (__v2df)
3978 _mm_setzero_pd (),
3979 (__mmask8) __U);
3982 extern __inline __m128
3983 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3984 _mm_scalef_ps (__m128 __A, __m128 __B)
3986 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3987 (__v4sf) __B,
3988 (__v4sf)
3989 _mm_setzero_ps (),
3990 (__mmask8) -1);
3993 extern __inline __m128
3994 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3995 _mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
3997 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3998 (__v4sf) __B,
3999 (__v4sf) __W,
4000 (__mmask8) __U);
4003 extern __inline __m128
4004 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4005 _mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B)
4007 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
4008 (__v4sf) __B,
4009 (__v4sf)
4010 _mm_setzero_ps (),
4011 (__mmask8) __U);
4014 extern __inline __m256d
4015 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4016 _mm256_mask_fmadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
4017 __m256d __C)
4019 return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
4020 (__v4df) __B,
4021 (__v4df) __C,
4022 (__mmask8) __U);
4025 extern __inline __m256d
4026 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4027 _mm256_mask3_fmadd_pd (__m256d __A, __m256d __B, __m256d __C,
4028 __mmask8 __U)
4030 return (__m256d) __builtin_ia32_vfmaddpd256_mask3 ((__v4df) __A,
4031 (__v4df) __B,
4032 (__v4df) __C,
4033 (__mmask8) __U);
4036 extern __inline __m256d
4037 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4038 _mm256_maskz_fmadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
4039 __m256d __C)
4041 return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
4042 (__v4df) __B,
4043 (__v4df) __C,
4044 (__mmask8) __U);
4047 extern __inline __m128d
4048 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4049 _mm_mask_fmadd_pd (__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
4051 return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
4052 (__v2df) __B,
4053 (__v2df) __C,
4054 (__mmask8) __U);
4057 extern __inline __m128d
4058 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4059 _mm_mask3_fmadd_pd (__m128d __A, __m128d __B, __m128d __C,
4060 __mmask8 __U)
4062 return (__m128d) __builtin_ia32_vfmaddpd128_mask3 ((__v2df) __A,
4063 (__v2df) __B,
4064 (__v2df) __C,
4065 (__mmask8) __U);
4068 extern __inline __m128d
4069 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4070 _mm_maskz_fmadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
4071 __m128d __C)
4073 return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
4074 (__v2df) __B,
4075 (__v2df) __C,
4076 (__mmask8) __U);
4079 extern __inline __m256
4080 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4081 _mm256_mask_fmadd_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
4083 return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
4084 (__v8sf) __B,
4085 (__v8sf) __C,
4086 (__mmask8) __U);
4089 extern __inline __m256
4090 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4091 _mm256_mask3_fmadd_ps (__m256 __A, __m256 __B, __m256 __C,
4092 __mmask8 __U)
4094 return (__m256) __builtin_ia32_vfmaddps256_mask3 ((__v8sf) __A,
4095 (__v8sf) __B,
4096 (__v8sf) __C,
4097 (__mmask8) __U);
4100 extern __inline __m256
4101 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4102 _mm256_maskz_fmadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
4103 __m256 __C)
4105 return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
4106 (__v8sf) __B,
4107 (__v8sf) __C,
4108 (__mmask8) __U);
4111 extern __inline __m128
4112 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4113 _mm_mask_fmadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4115 return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
4116 (__v4sf) __B,
4117 (__v4sf) __C,
4118 (__mmask8) __U);
4121 extern __inline __m128
4122 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4123 _mm_mask3_fmadd_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4125 return (__m128) __builtin_ia32_vfmaddps128_mask3 ((__v4sf) __A,
4126 (__v4sf) __B,
4127 (__v4sf) __C,
4128 (__mmask8) __U);
4131 extern __inline __m128
4132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4133 _mm_maskz_fmadd_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4135 return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
4136 (__v4sf) __B,
4137 (__v4sf) __C,
4138 (__mmask8) __U);
4141 extern __inline __m256d
4142 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4143 _mm256_mask_fmsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
4144 __m256d __C)
4146 return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
4147 (__v4df) __B,
4148 -(__v4df) __C,
4149 (__mmask8) __U);
4152 extern __inline __m256d
4153 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4154 _mm256_mask3_fmsub_pd (__m256d __A, __m256d __B, __m256d __C,
4155 __mmask8 __U)
4157 return (__m256d) __builtin_ia32_vfmsubpd256_mask3 ((__v4df) __A,
4158 (__v4df) __B,
4159 (__v4df) __C,
4160 (__mmask8) __U);
4163 extern __inline __m256d
4164 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4165 _mm256_maskz_fmsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
4166 __m256d __C)
4168 return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
4169 (__v4df) __B,
4170 -(__v4df) __C,
4171 (__mmask8) __U);
4174 extern __inline __m128d
4175 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4176 _mm_mask_fmsub_pd (__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
4178 return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
4179 (__v2df) __B,
4180 -(__v2df) __C,
4181 (__mmask8) __U);
4184 extern __inline __m128d
4185 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4186 _mm_mask3_fmsub_pd (__m128d __A, __m128d __B, __m128d __C,
4187 __mmask8 __U)
4189 return (__m128d) __builtin_ia32_vfmsubpd128_mask3 ((__v2df) __A,
4190 (__v2df) __B,
4191 (__v2df) __C,
4192 (__mmask8) __U);
4195 extern __inline __m128d
4196 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4197 _mm_maskz_fmsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
4198 __m128d __C)
4200 return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
4201 (__v2df) __B,
4202 -(__v2df) __C,
4203 (__mmask8) __U);
4206 extern __inline __m256
4207 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4208 _mm256_mask_fmsub_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
4210 return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
4211 (__v8sf) __B,
4212 -(__v8sf) __C,
4213 (__mmask8) __U);
4216 extern __inline __m256
4217 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4218 _mm256_mask3_fmsub_ps (__m256 __A, __m256 __B, __m256 __C,
4219 __mmask8 __U)
4221 return (__m256) __builtin_ia32_vfmsubps256_mask3 ((__v8sf) __A,
4222 (__v8sf) __B,
4223 (__v8sf) __C,
4224 (__mmask8) __U);
4227 extern __inline __m256
4228 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4229 _mm256_maskz_fmsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
4230 __m256 __C)
4232 return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
4233 (__v8sf) __B,
4234 -(__v8sf) __C,
4235 (__mmask8) __U);
4238 extern __inline __m128
4239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4240 _mm_mask_fmsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4242 return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
4243 (__v4sf) __B,
4244 -(__v4sf) __C,
4245 (__mmask8) __U);
4248 extern __inline __m128
4249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4250 _mm_mask3_fmsub_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4252 return (__m128) __builtin_ia32_vfmsubps128_mask3 ((__v4sf) __A,
4253 (__v4sf) __B,
4254 (__v4sf) __C,
4255 (__mmask8) __U);
4258 extern __inline __m128
4259 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4260 _mm_maskz_fmsub_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4262 return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
4263 (__v4sf) __B,
4264 -(__v4sf) __C,
4265 (__mmask8) __U);
4268 extern __inline __m256d
4269 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4270 _mm256_mask_fmaddsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
4271 __m256d __C)
4273 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
4274 (__v4df) __B,
4275 (__v4df) __C,
4276 (__mmask8) __U);
4279 extern __inline __m256d
4280 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4281 _mm256_mask3_fmaddsub_pd (__m256d __A, __m256d __B, __m256d __C,
4282 __mmask8 __U)
4284 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask3 ((__v4df) __A,
4285 (__v4df) __B,
4286 (__v4df) __C,
4287 (__mmask8)
4288 __U);
4291 extern __inline __m256d
4292 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4293 _mm256_maskz_fmaddsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
4294 __m256d __C)
4296 return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
4297 (__v4df) __B,
4298 (__v4df) __C,
4299 (__mmask8)
4300 __U);
4303 extern __inline __m128d
4304 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4305 _mm_mask_fmaddsub_pd (__m128d __A, __mmask8 __U, __m128d __B,
4306 __m128d __C)
4308 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
4309 (__v2df) __B,
4310 (__v2df) __C,
4311 (__mmask8) __U);
4314 extern __inline __m128d
4315 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4316 _mm_mask3_fmaddsub_pd (__m128d __A, __m128d __B, __m128d __C,
4317 __mmask8 __U)
4319 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask3 ((__v2df) __A,
4320 (__v2df) __B,
4321 (__v2df) __C,
4322 (__mmask8)
4323 __U);
4326 extern __inline __m128d
4327 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4328 _mm_maskz_fmaddsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
4329 __m128d __C)
4331 return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
4332 (__v2df) __B,
4333 (__v2df) __C,
4334 (__mmask8)
4335 __U);
4338 extern __inline __m256
4339 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4340 _mm256_mask_fmaddsub_ps (__m256 __A, __mmask8 __U, __m256 __B,
4341 __m256 __C)
4343 return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
4344 (__v8sf) __B,
4345 (__v8sf) __C,
4346 (__mmask8) __U);
4349 extern __inline __m256
4350 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4351 _mm256_mask3_fmaddsub_ps (__m256 __A, __m256 __B, __m256 __C,
4352 __mmask8 __U)
4354 return (__m256) __builtin_ia32_vfmaddsubps256_mask3 ((__v8sf) __A,
4355 (__v8sf) __B,
4356 (__v8sf) __C,
4357 (__mmask8) __U);
4360 extern __inline __m256
4361 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4362 _mm256_maskz_fmaddsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
4363 __m256 __C)
4365 return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
4366 (__v8sf) __B,
4367 (__v8sf) __C,
4368 (__mmask8) __U);
4371 extern __inline __m128
4372 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4373 _mm_mask_fmaddsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4375 return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
4376 (__v4sf) __B,
4377 (__v4sf) __C,
4378 (__mmask8) __U);
4381 extern __inline __m128
4382 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4383 _mm_mask3_fmaddsub_ps (__m128 __A, __m128 __B, __m128 __C,
4384 __mmask8 __U)
4386 return (__m128) __builtin_ia32_vfmaddsubps128_mask3 ((__v4sf) __A,
4387 (__v4sf) __B,
4388 (__v4sf) __C,
4389 (__mmask8) __U);
4392 extern __inline __m128
4393 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4394 _mm_maskz_fmaddsub_ps (__mmask8 __U, __m128 __A, __m128 __B,
4395 __m128 __C)
4397 return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
4398 (__v4sf) __B,
4399 (__v4sf) __C,
4400 (__mmask8) __U);
4403 extern __inline __m256d
4404 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4405 _mm256_mask_fmsubadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
4406 __m256d __C)
4408 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
4409 (__v4df) __B,
4410 -(__v4df) __C,
4411 (__mmask8) __U);
4414 extern __inline __m256d
4415 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4416 _mm256_mask3_fmsubadd_pd (__m256d __A, __m256d __B, __m256d __C,
4417 __mmask8 __U)
4419 return (__m256d) __builtin_ia32_vfmsubaddpd256_mask3 ((__v4df) __A,
4420 (__v4df) __B,
4421 (__v4df) __C,
4422 (__mmask8)
4423 __U);
4426 extern __inline __m256d
4427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4428 _mm256_maskz_fmsubadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
4429 __m256d __C)
4431 return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
4432 (__v4df) __B,
4433 -(__v4df) __C,
4434 (__mmask8)
4435 __U);
4438 extern __inline __m128d
4439 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4440 _mm_mask_fmsubadd_pd (__m128d __A, __mmask8 __U, __m128d __B,
4441 __m128d __C)
4443 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
4444 (__v2df) __B,
4445 -(__v2df) __C,
4446 (__mmask8) __U);
4449 extern __inline __m128d
4450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4451 _mm_mask3_fmsubadd_pd (__m128d __A, __m128d __B, __m128d __C,
4452 __mmask8 __U)
4454 return (__m128d) __builtin_ia32_vfmsubaddpd128_mask3 ((__v2df) __A,
4455 (__v2df) __B,
4456 (__v2df) __C,
4457 (__mmask8)
4458 __U);
4461 extern __inline __m128d
4462 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4463 _mm_maskz_fmsubadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
4464 __m128d __C)
4466 return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
4467 (__v2df) __B,
4468 -(__v2df) __C,
4469 (__mmask8)
4470 __U);
4473 extern __inline __m256
4474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4475 _mm256_mask_fmsubadd_ps (__m256 __A, __mmask8 __U, __m256 __B,
4476 __m256 __C)
4478 return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
4479 (__v8sf) __B,
4480 -(__v8sf) __C,
4481 (__mmask8) __U);
4484 extern __inline __m256
4485 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4486 _mm256_mask3_fmsubadd_ps (__m256 __A, __m256 __B, __m256 __C,
4487 __mmask8 __U)
4489 return (__m256) __builtin_ia32_vfmsubaddps256_mask3 ((__v8sf) __A,
4490 (__v8sf) __B,
4491 (__v8sf) __C,
4492 (__mmask8) __U);
4495 extern __inline __m256
4496 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4497 _mm256_maskz_fmsubadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
4498 __m256 __C)
4500 return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
4501 (__v8sf) __B,
4502 -(__v8sf) __C,
4503 (__mmask8) __U);
4506 extern __inline __m128
4507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4508 _mm_mask_fmsubadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4510 return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
4511 (__v4sf) __B,
4512 -(__v4sf) __C,
4513 (__mmask8) __U);
4516 extern __inline __m128
4517 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4518 _mm_mask3_fmsubadd_ps (__m128 __A, __m128 __B, __m128 __C,
4519 __mmask8 __U)
4521 return (__m128) __builtin_ia32_vfmsubaddps128_mask3 ((__v4sf) __A,
4522 (__v4sf) __B,
4523 (__v4sf) __C,
4524 (__mmask8) __U);
4527 extern __inline __m128
4528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4529 _mm_maskz_fmsubadd_ps (__mmask8 __U, __m128 __A, __m128 __B,
4530 __m128 __C)
4532 return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
4533 (__v4sf) __B,
4534 -(__v4sf) __C,
4535 (__mmask8) __U);
4538 extern __inline __m256d
4539 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4540 _mm256_mask_fnmadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
4541 __m256d __C)
4543 return (__m256d) __builtin_ia32_vfnmaddpd256_mask ((__v4df) __A,
4544 (__v4df) __B,
4545 (__v4df) __C,
4546 (__mmask8) __U);
4549 extern __inline __m256d
4550 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4551 _mm256_mask3_fnmadd_pd (__m256d __A, __m256d __B, __m256d __C,
4552 __mmask8 __U)
4554 return (__m256d) __builtin_ia32_vfmaddpd256_mask3 (-(__v4df) __A,
4555 (__v4df) __B,
4556 (__v4df) __C,
4557 (__mmask8) __U);
4560 extern __inline __m256d
4561 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4562 _mm256_maskz_fnmadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
4563 __m256d __C)
4565 return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
4566 (__v4df) __B,
4567 (__v4df) __C,
4568 (__mmask8) __U);
4571 extern __inline __m128d
4572 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4573 _mm_mask_fnmadd_pd (__m128d __A, __mmask8 __U, __m128d __B,
4574 __m128d __C)
4576 return (__m128d) __builtin_ia32_vfnmaddpd128_mask ((__v2df) __A,
4577 (__v2df) __B,
4578 (__v2df) __C,
4579 (__mmask8) __U);
4582 extern __inline __m128d
4583 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4584 _mm_mask3_fnmadd_pd (__m128d __A, __m128d __B, __m128d __C,
4585 __mmask8 __U)
4587 return (__m128d) __builtin_ia32_vfmaddpd128_mask3 (-(__v2df) __A,
4588 (__v2df) __B,
4589 (__v2df) __C,
4590 (__mmask8) __U);
4593 extern __inline __m128d
4594 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4595 _mm_maskz_fnmadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
4596 __m128d __C)
4598 return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
4599 (__v2df) __B,
4600 (__v2df) __C,
4601 (__mmask8) __U);
4604 extern __inline __m256
4605 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4606 _mm256_mask_fnmadd_ps (__m256 __A, __mmask8 __U, __m256 __B,
4607 __m256 __C)
4609 return (__m256) __builtin_ia32_vfnmaddps256_mask ((__v8sf) __A,
4610 (__v8sf) __B,
4611 (__v8sf) __C,
4612 (__mmask8) __U);
4615 extern __inline __m256
4616 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4617 _mm256_mask3_fnmadd_ps (__m256 __A, __m256 __B, __m256 __C,
4618 __mmask8 __U)
4620 return (__m256) __builtin_ia32_vfmaddps256_mask3 (-(__v8sf) __A,
4621 (__v8sf) __B,
4622 (__v8sf) __C,
4623 (__mmask8) __U);
4626 extern __inline __m256
4627 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4628 _mm256_maskz_fnmadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
4629 __m256 __C)
4631 return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
4632 (__v8sf) __B,
4633 (__v8sf) __C,
4634 (__mmask8) __U);
4637 extern __inline __m128
4638 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4639 _mm_mask_fnmadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4641 return (__m128) __builtin_ia32_vfnmaddps128_mask ((__v4sf) __A,
4642 (__v4sf) __B,
4643 (__v4sf) __C,
4644 (__mmask8) __U);
4647 extern __inline __m128
4648 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4649 _mm_mask3_fnmadd_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4651 return (__m128) __builtin_ia32_vfmaddps128_mask3 (-(__v4sf) __A,
4652 (__v4sf) __B,
4653 (__v4sf) __C,
4654 (__mmask8) __U);
4657 extern __inline __m128
4658 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4659 _mm_maskz_fnmadd_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4661 return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
4662 (__v4sf) __B,
4663 (__v4sf) __C,
4664 (__mmask8) __U);
4667 extern __inline __m256d
4668 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4669 _mm256_mask_fnmsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
4670 __m256d __C)
4672 return (__m256d) __builtin_ia32_vfnmsubpd256_mask ((__v4df) __A,
4673 (__v4df) __B,
4674 (__v4df) __C,
4675 (__mmask8) __U);
4678 extern __inline __m256d
4679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4680 _mm256_mask3_fnmsub_pd (__m256d __A, __m256d __B, __m256d __C,
4681 __mmask8 __U)
4683 return (__m256d) __builtin_ia32_vfnmsubpd256_mask3 ((__v4df) __A,
4684 (__v4df) __B,
4685 (__v4df) __C,
4686 (__mmask8) __U);
4689 extern __inline __m256d
4690 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4691 _mm256_maskz_fnmsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
4692 __m256d __C)
4694 return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
4695 (__v4df) __B,
4696 -(__v4df) __C,
4697 (__mmask8) __U);
4700 extern __inline __m128d
4701 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4702 _mm_mask_fnmsub_pd (__m128d __A, __mmask8 __U, __m128d __B,
4703 __m128d __C)
4705 return (__m128d) __builtin_ia32_vfnmsubpd128_mask ((__v2df) __A,
4706 (__v2df) __B,
4707 (__v2df) __C,
4708 (__mmask8) __U);
4711 extern __inline __m128d
4712 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4713 _mm_mask3_fnmsub_pd (__m128d __A, __m128d __B, __m128d __C,
4714 __mmask8 __U)
4716 return (__m128d) __builtin_ia32_vfnmsubpd128_mask3 ((__v2df) __A,
4717 (__v2df) __B,
4718 (__v2df) __C,
4719 (__mmask8) __U);
4722 extern __inline __m128d
4723 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4724 _mm_maskz_fnmsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
4725 __m128d __C)
4727 return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
4728 (__v2df) __B,
4729 -(__v2df) __C,
4730 (__mmask8) __U);
4733 extern __inline __m256
4734 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4735 _mm256_mask_fnmsub_ps (__m256 __A, __mmask8 __U, __m256 __B,
4736 __m256 __C)
4738 return (__m256) __builtin_ia32_vfnmsubps256_mask ((__v8sf) __A,
4739 (__v8sf) __B,
4740 (__v8sf) __C,
4741 (__mmask8) __U);
4744 extern __inline __m256
4745 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4746 _mm256_mask3_fnmsub_ps (__m256 __A, __m256 __B, __m256 __C,
4747 __mmask8 __U)
4749 return (__m256) __builtin_ia32_vfnmsubps256_mask3 ((__v8sf) __A,
4750 (__v8sf) __B,
4751 (__v8sf) __C,
4752 (__mmask8) __U);
4755 extern __inline __m256
4756 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4757 _mm256_maskz_fnmsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
4758 __m256 __C)
4760 return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
4761 (__v8sf) __B,
4762 -(__v8sf) __C,
4763 (__mmask8) __U);
4766 extern __inline __m128
4767 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4768 _mm_mask_fnmsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4770 return (__m128) __builtin_ia32_vfnmsubps128_mask ((__v4sf) __A,
4771 (__v4sf) __B,
4772 (__v4sf) __C,
4773 (__mmask8) __U);
4776 extern __inline __m128
4777 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4778 _mm_mask3_fnmsub_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4780 return (__m128) __builtin_ia32_vfnmsubps128_mask3 ((__v4sf) __A,
4781 (__v4sf) __B,
4782 (__v4sf) __C,
4783 (__mmask8) __U);
4786 extern __inline __m128
4787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4788 _mm_maskz_fnmsub_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4790 return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
4791 (__v4sf) __B,
4792 -(__v4sf) __C,
4793 (__mmask8) __U);
4796 extern __inline __m128i
4797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4798 _mm_mask_and_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
4799 __m128i __B)
4801 return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
4802 (__v4si) __B,
4803 (__v4si) __W,
4804 (__mmask8) __U);
4807 extern __inline __m128i
4808 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4809 _mm_maskz_and_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4811 return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
4812 (__v4si) __B,
4813 (__v4si)
4814 _mm_setzero_si128 (),
4815 (__mmask8) __U);
4818 extern __inline __m256i
4819 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4820 _mm256_mask_andnot_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
4821 __m256i __B)
4823 return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A,
4824 (__v8si) __B,
4825 (__v8si) __W,
4826 (__mmask8) __U);
4829 extern __inline __m256i
4830 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4831 _mm256_maskz_andnot_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4833 return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A,
4834 (__v8si) __B,
4835 (__v8si)
4836 _mm256_setzero_si256 (),
4837 (__mmask8) __U);
4840 extern __inline __m128i
4841 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4842 _mm_mask_andnot_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
4843 __m128i __B)
4845 return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A,
4846 (__v4si) __B,
4847 (__v4si) __W,
4848 (__mmask8) __U);
4851 extern __inline __m128i
4852 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4853 _mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4855 return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A,
4856 (__v4si) __B,
4857 (__v4si)
4858 _mm_setzero_si128 (),
4859 (__mmask8) __U);
4862 extern __inline __m256i
4863 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4864 _mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
4865 __m256i __B)
4867 return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A,
4868 (__v8si) __B,
4869 (__v8si) __W,
4870 (__mmask8) __U);
4873 extern __inline __m256i
4874 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4875 _mm256_maskz_or_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4877 return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A,
4878 (__v8si) __B,
4879 (__v8si)
4880 _mm256_setzero_si256 (),
4881 (__mmask8) __U);
4884 extern __inline __m128i
4885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4886 _mm_mask_or_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4888 return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
4889 (__v4si) __B,
4890 (__v4si) __W,
4891 (__mmask8) __U);
4894 extern __inline __m128i
4895 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4896 _mm_maskz_or_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4898 return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
4899 (__v4si) __B,
4900 (__v4si)
4901 _mm_setzero_si128 (),
4902 (__mmask8) __U);
4905 extern __inline __m256i
4906 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4907 _mm256_mask_xor_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
4908 __m256i __B)
4910 return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A,
4911 (__v8si) __B,
4912 (__v8si) __W,
4913 (__mmask8) __U);
4916 extern __inline __m256i
4917 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4918 _mm256_maskz_xor_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4920 return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A,
4921 (__v8si) __B,
4922 (__v8si)
4923 _mm256_setzero_si256 (),
4924 (__mmask8) __U);
4927 extern __inline __m128i
4928 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4929 _mm_mask_xor_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
4930 __m128i __B)
4932 return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A,
4933 (__v4si) __B,
4934 (__v4si) __W,
4935 (__mmask8) __U);
4938 extern __inline __m128i
4939 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4940 _mm_maskz_xor_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4942 return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A,
4943 (__v4si) __B,
4944 (__v4si)
4945 _mm_setzero_si128 (),
4946 (__mmask8) __U);
4949 extern __inline __m128
4950 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4951 _mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A)
4953 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
4954 (__v4sf) __W,
4955 (__mmask8) __U);
4958 extern __inline __m128
4959 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4960 _mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A)
4962 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
4963 (__v4sf)
4964 _mm_setzero_ps (),
4965 (__mmask8) __U);
4968 extern __inline __m128
4969 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4970 _mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A)
4972 return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
4973 (__v4sf) __W,
4974 (__mmask8) __U);
4977 extern __inline __m128
4978 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4979 _mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A)
4981 return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
4982 (__v4sf)
4983 _mm_setzero_ps (),
4984 (__mmask8) __U);
4987 extern __inline __m256i
4988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4989 _mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A)
4991 return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
4992 (__v8si) __W,
4993 (__mmask8) __U);
4996 extern __inline __m256i
4997 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4998 _mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A)
5000 return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
5001 (__v8si)
5002 _mm256_setzero_si256 (),
5003 (__mmask8) __U);
5006 extern __inline __m128i
5007 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5008 _mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A)
5010 return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
5011 (__v4si) __W,
5012 (__mmask8) __U);
5015 extern __inline __m128i
5016 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5017 _mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A)
5019 return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
5020 (__v4si)
5021 _mm_setzero_si128 (),
5022 (__mmask8) __U);
5025 extern __inline __m256i
5026 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5027 _mm256_cvtps_epu32 (__m256 __A)
5029 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
5030 (__v8si)
5031 _mm256_setzero_si256 (),
5032 (__mmask8) -1);
5035 extern __inline __m256i
5036 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5037 _mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A)
5039 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
5040 (__v8si) __W,
5041 (__mmask8) __U);
5044 extern __inline __m256i
5045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5046 _mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A)
5048 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
5049 (__v8si)
5050 _mm256_setzero_si256 (),
5051 (__mmask8) __U);
5054 extern __inline __m128i
5055 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5056 _mm_cvtps_epu32 (__m128 __A)
5058 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
5059 (__v4si)
5060 _mm_setzero_si128 (),
5061 (__mmask8) -1);
5064 extern __inline __m128i
5065 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5066 _mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A)
5068 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
5069 (__v4si) __W,
5070 (__mmask8) __U);
5073 extern __inline __m128i
5074 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5075 _mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A)
5077 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
5078 (__v4si)
5079 _mm_setzero_si128 (),
5080 (__mmask8) __U);
5083 extern __inline __m256d
5084 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5085 _mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A)
5087 return (__m256d) __builtin_ia32_movddup256_mask ((__v4df) __A,
5088 (__v4df) __W,
5089 (__mmask8) __U);
5092 extern __inline __m256d
5093 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5094 _mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A)
5096 return (__m256d) __builtin_ia32_movddup256_mask ((__v4df) __A,
5097 (__v4df)
5098 _mm256_setzero_pd (),
5099 (__mmask8) __U);
5102 extern __inline __m128d
5103 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5104 _mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A)
5106 return (__m128d) __builtin_ia32_movddup128_mask ((__v2df) __A,
5107 (__v2df) __W,
5108 (__mmask8) __U);
5111 extern __inline __m128d
5112 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5113 _mm_maskz_movedup_pd (__mmask8 __U, __m128d __A)
5115 return (__m128d) __builtin_ia32_movddup128_mask ((__v2df) __A,
5116 (__v2df)
5117 _mm_setzero_pd (),
5118 (__mmask8) __U);
5121 extern __inline __m256
5122 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5123 _mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A)
5125 return (__m256) __builtin_ia32_movshdup256_mask ((__v8sf) __A,
5126 (__v8sf) __W,
5127 (__mmask8) __U);
5130 extern __inline __m256
5131 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5132 _mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A)
5134 return (__m256) __builtin_ia32_movshdup256_mask ((__v8sf) __A,
5135 (__v8sf)
5136 _mm256_setzero_ps (),
5137 (__mmask8) __U);
5140 extern __inline __m128
5141 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5142 _mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A)
5144 return (__m128) __builtin_ia32_movshdup128_mask ((__v4sf) __A,
5145 (__v4sf) __W,
5146 (__mmask8) __U);
5149 extern __inline __m128
5150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5151 _mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A)
5153 return (__m128) __builtin_ia32_movshdup128_mask ((__v4sf) __A,
5154 (__v4sf)
5155 _mm_setzero_ps (),
5156 (__mmask8) __U);
5159 extern __inline __m256
5160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5161 _mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A)
5163 return (__m256) __builtin_ia32_movsldup256_mask ((__v8sf) __A,
5164 (__v8sf) __W,
5165 (__mmask8) __U);
5168 extern __inline __m256
5169 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5170 _mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A)
5172 return (__m256) __builtin_ia32_movsldup256_mask ((__v8sf) __A,
5173 (__v8sf)
5174 _mm256_setzero_ps (),
5175 (__mmask8) __U);
5178 extern __inline __m128
5179 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5180 _mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A)
5182 return (__m128) __builtin_ia32_movsldup128_mask ((__v4sf) __A,
5183 (__v4sf) __W,
5184 (__mmask8) __U);
5187 extern __inline __m128
5188 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5189 _mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A)
5191 return (__m128) __builtin_ia32_movsldup128_mask ((__v4sf) __A,
5192 (__v4sf)
5193 _mm_setzero_ps (),
5194 (__mmask8) __U);
5197 extern __inline __m128i
5198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5199 _mm_mask_unpackhi_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
5200 __m128i __B)
5202 return (__m128i) __builtin_ia32_punpckhdq128_mask ((__v4si) __A,
5203 (__v4si) __B,
5204 (__v4si) __W,
5205 (__mmask8) __U);
5208 extern __inline __m128i
5209 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5210 _mm_maskz_unpackhi_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
5212 return (__m128i) __builtin_ia32_punpckhdq128_mask ((__v4si) __A,
5213 (__v4si) __B,
5214 (__v4si)
5215 _mm_setzero_si128 (),
5216 (__mmask8) __U);
5219 extern __inline __m256i
5220 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5221 _mm256_mask_unpackhi_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
5222 __m256i __B)
5224 return (__m256i) __builtin_ia32_punpckhdq256_mask ((__v8si) __A,
5225 (__v8si) __B,
5226 (__v8si) __W,
5227 (__mmask8) __U);
5230 extern __inline __m256i
5231 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5232 _mm256_maskz_unpackhi_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
5234 return (__m256i) __builtin_ia32_punpckhdq256_mask ((__v8si) __A,
5235 (__v8si) __B,
5236 (__v8si)
5237 _mm256_setzero_si256 (),
5238 (__mmask8) __U);
5241 extern __inline __m128i
5242 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5243 _mm_mask_unpackhi_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
5244 __m128i __B)
5246 return (__m128i) __builtin_ia32_punpckhqdq128_mask ((__v2di) __A,
5247 (__v2di) __B,
5248 (__v2di) __W,
5249 (__mmask8) __U);
5252 extern __inline __m128i
5253 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5254 _mm_maskz_unpackhi_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
5256 return (__m128i) __builtin_ia32_punpckhqdq128_mask ((__v2di) __A,
5257 (__v2di) __B,
5258 (__v2di)
5259 _mm_setzero_di (),
5260 (__mmask8) __U);
5263 extern __inline __m256i
5264 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5265 _mm256_mask_unpackhi_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
5266 __m256i __B)
5268 return (__m256i) __builtin_ia32_punpckhqdq256_mask ((__v4di) __A,
5269 (__v4di) __B,
5270 (__v4di) __W,
5271 (__mmask8) __U);
5274 extern __inline __m256i
5275 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5276 _mm256_maskz_unpackhi_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
5278 return (__m256i) __builtin_ia32_punpckhqdq256_mask ((__v4di) __A,
5279 (__v4di) __B,
5280 (__v4di)
5281 _mm256_setzero_si256 (),
5282 (__mmask8) __U);
5285 extern __inline __m128i
5286 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5287 _mm_mask_unpacklo_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
5288 __m128i __B)
5290 return (__m128i) __builtin_ia32_punpckldq128_mask ((__v4si) __A,
5291 (__v4si) __B,
5292 (__v4si) __W,
5293 (__mmask8) __U);
5296 extern __inline __m128i
5297 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5298 _mm_maskz_unpacklo_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
5300 return (__m128i) __builtin_ia32_punpckldq128_mask ((__v4si) __A,
5301 (__v4si) __B,
5302 (__v4si)
5303 _mm_setzero_si128 (),
5304 (__mmask8) __U);
5307 extern __inline __m256i
5308 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5309 _mm256_mask_unpacklo_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
5310 __m256i __B)
5312 return (__m256i) __builtin_ia32_punpckldq256_mask ((__v8si) __A,
5313 (__v8si) __B,
5314 (__v8si) __W,
5315 (__mmask8) __U);
5318 extern __inline __m256i
5319 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5320 _mm256_maskz_unpacklo_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
5322 return (__m256i) __builtin_ia32_punpckldq256_mask ((__v8si) __A,
5323 (__v8si) __B,
5324 (__v8si)
5325 _mm256_setzero_si256 (),
5326 (__mmask8) __U);
5329 extern __inline __m128i
5330 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5331 _mm_mask_unpacklo_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
5332 __m128i __B)
5334 return (__m128i) __builtin_ia32_punpcklqdq128_mask ((__v2di) __A,
5335 (__v2di) __B,
5336 (__v2di) __W,
5337 (__mmask8) __U);
5340 extern __inline __m128i
5341 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5342 _mm_maskz_unpacklo_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
5344 return (__m128i) __builtin_ia32_punpcklqdq128_mask ((__v2di) __A,
5345 (__v2di) __B,
5346 (__v2di)
5347 _mm_setzero_di (),
5348 (__mmask8) __U);
5351 extern __inline __m256i
5352 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5353 _mm256_mask_unpacklo_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
5354 __m256i __B)
5356 return (__m256i) __builtin_ia32_punpcklqdq256_mask ((__v4di) __A,
5357 (__v4di) __B,
5358 (__v4di) __W,
5359 (__mmask8) __U);
5362 extern __inline __m256i
5363 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5364 _mm256_maskz_unpacklo_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
5366 return (__m256i) __builtin_ia32_punpcklqdq256_mask ((__v4di) __A,
5367 (__v4di) __B,
5368 (__v4di)
5369 _mm256_setzero_si256 (),
5370 (__mmask8) __U);
5373 extern __inline __mmask8
5374 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5375 _mm_cmpeq_epi32_mask (__m128i __A, __m128i __B)
5377 return (__mmask8) __builtin_ia32_pcmpeqd128_mask ((__v4si) __A,
5378 (__v4si) __B,
5379 (__mmask8) -1);
5382 extern __inline __mmask8
5383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5384 _mm_mask_cmpeq_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5386 return (__mmask8) __builtin_ia32_pcmpeqd128_mask ((__v4si) __A,
5387 (__v4si) __B, __U);
5390 extern __inline __mmask8
5391 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5392 _mm256_cmpeq_epi32_mask (__m256i __A, __m256i __B)
5394 return (__mmask8) __builtin_ia32_pcmpeqd256_mask ((__v8si) __A,
5395 (__v8si) __B,
5396 (__mmask8) -1);
5399 extern __inline __mmask8
5400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5401 _mm256_mask_cmpeq_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5403 return (__mmask8) __builtin_ia32_pcmpeqd256_mask ((__v8si) __A,
5404 (__v8si) __B, __U);
5407 extern __inline __mmask8
5408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5409 _mm_cmpeq_epi64_mask (__m128i __A, __m128i __B)
5411 return (__mmask8) __builtin_ia32_pcmpeqq128_mask ((__v2di) __A,
5412 (__v2di) __B,
5413 (__mmask8) -1);
5416 extern __inline __mmask8
5417 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5418 _mm_mask_cmpeq_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5420 return (__mmask8) __builtin_ia32_pcmpeqq128_mask ((__v2di) __A,
5421 (__v2di) __B, __U);
5424 extern __inline __mmask8
5425 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5426 _mm256_cmpeq_epi64_mask (__m256i __A, __m256i __B)
5428 return (__mmask8) __builtin_ia32_pcmpeqq256_mask ((__v4di) __A,
5429 (__v4di) __B,
5430 (__mmask8) -1);
5433 extern __inline __mmask8
5434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5435 _mm256_mask_cmpeq_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5437 return (__mmask8) __builtin_ia32_pcmpeqq256_mask ((__v4di) __A,
5438 (__v4di) __B, __U);
5441 extern __inline __mmask8
5442 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5443 _mm_cmpgt_epi32_mask (__m128i __A, __m128i __B)
5445 return (__mmask8) __builtin_ia32_pcmpgtd128_mask ((__v4si) __A,
5446 (__v4si) __B,
5447 (__mmask8) -1);
5450 extern __inline __mmask8
5451 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5452 _mm_mask_cmpgt_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5454 return (__mmask8) __builtin_ia32_pcmpgtd128_mask ((__v4si) __A,
5455 (__v4si) __B, __U);
5458 extern __inline __mmask8
5459 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5460 _mm256_cmpgt_epi32_mask (__m256i __A, __m256i __B)
5462 return (__mmask8) __builtin_ia32_pcmpgtd256_mask ((__v8si) __A,
5463 (__v8si) __B,
5464 (__mmask8) -1);
5467 extern __inline __mmask8
5468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5469 _mm256_mask_cmpgt_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5471 return (__mmask8) __builtin_ia32_pcmpgtd256_mask ((__v8si) __A,
5472 (__v8si) __B, __U);
5475 extern __inline __mmask8
5476 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5477 _mm_cmpgt_epi64_mask (__m128i __A, __m128i __B)
5479 return (__mmask8) __builtin_ia32_pcmpgtq128_mask ((__v2di) __A,
5480 (__v2di) __B,
5481 (__mmask8) -1);
5484 extern __inline __mmask8
5485 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5486 _mm_mask_cmpgt_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5488 return (__mmask8) __builtin_ia32_pcmpgtq128_mask ((__v2di) __A,
5489 (__v2di) __B, __U);
5492 extern __inline __mmask8
5493 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5494 _mm256_cmpgt_epi64_mask (__m256i __A, __m256i __B)
5496 return (__mmask8) __builtin_ia32_pcmpgtq256_mask ((__v4di) __A,
5497 (__v4di) __B,
5498 (__mmask8) -1);
5501 extern __inline __mmask8
5502 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5503 _mm256_mask_cmpgt_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5505 return (__mmask8) __builtin_ia32_pcmpgtq256_mask ((__v4di) __A,
5506 (__v4di) __B, __U);
5509 extern __inline __mmask8
5510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5511 _mm_test_epi32_mask (__m128i __A, __m128i __B)
5513 return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
5514 (__v4si) __B,
5515 (__mmask8) -1);
5518 extern __inline __mmask8
5519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5520 _mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5522 return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
5523 (__v4si) __B, __U);
5526 extern __inline __mmask8
5527 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5528 _mm256_test_epi32_mask (__m256i __A, __m256i __B)
5530 return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
5531 (__v8si) __B,
5532 (__mmask8) -1);
5535 extern __inline __mmask8
5536 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5537 _mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5539 return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
5540 (__v8si) __B, __U);
5543 extern __inline __mmask8
5544 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5545 _mm_test_epi64_mask (__m128i __A, __m128i __B)
5547 return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
5548 (__v2di) __B,
5549 (__mmask8) -1);
5552 extern __inline __mmask8
5553 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5554 _mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5556 return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
5557 (__v2di) __B, __U);
5560 extern __inline __mmask8
5561 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5562 _mm256_test_epi64_mask (__m256i __A, __m256i __B)
5564 return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
5565 (__v4di) __B,
5566 (__mmask8) -1);
5569 extern __inline __mmask8
5570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5571 _mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5573 return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
5574 (__v4di) __B, __U);
5577 extern __inline __mmask8
5578 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5579 _mm_testn_epi32_mask (__m128i __A, __m128i __B)
5581 return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
5582 (__v4si) __B,
5583 (__mmask8) -1);
5586 extern __inline __mmask8
5587 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5588 _mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5590 return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
5591 (__v4si) __B, __U);
5594 extern __inline __mmask8
5595 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5596 _mm256_testn_epi32_mask (__m256i __A, __m256i __B)
5598 return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
5599 (__v8si) __B,
5600 (__mmask8) -1);
5603 extern __inline __mmask8
5604 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5605 _mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5607 return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
5608 (__v8si) __B, __U);
5611 extern __inline __mmask8
5612 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5613 _mm_testn_epi64_mask (__m128i __A, __m128i __B)
5615 return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
5616 (__v2di) __B,
5617 (__mmask8) -1);
5620 extern __inline __mmask8
5621 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5622 _mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5624 return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
5625 (__v2di) __B, __U);
5628 extern __inline __mmask8
5629 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5630 _mm256_testn_epi64_mask (__m256i __A, __m256i __B)
5632 return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
5633 (__v4di) __B,
5634 (__mmask8) -1);
5637 extern __inline __mmask8
5638 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5639 _mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5641 return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
5642 (__v4di) __B, __U);
5645 extern __inline __m256d
5646 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5647 _mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A)
5649 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
5650 (__v4df) __W,
5651 (__mmask8) __U);
5654 extern __inline __m256d
5655 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5656 _mm256_maskz_compress_pd (__mmask8 __U, __m256d __A)
5658 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
5659 (__v4df)
5660 _mm256_setzero_pd (),
5661 (__mmask8) __U);
5664 extern __inline void
5665 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5666 _mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A)
5668 __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
5669 (__v4df) __A,
5670 (__mmask8) __U);
5673 extern __inline __m128d
5674 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5675 _mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A)
5677 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
5678 (__v2df) __W,
5679 (__mmask8) __U);
5682 extern __inline __m128d
5683 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5684 _mm_maskz_compress_pd (__mmask8 __U, __m128d __A)
5686 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
5687 (__v2df)
5688 _mm_setzero_pd (),
5689 (__mmask8) __U);
5692 extern __inline void
5693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5694 _mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A)
5696 __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
5697 (__v2df) __A,
5698 (__mmask8) __U);
5701 extern __inline __m256
5702 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5703 _mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A)
5705 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
5706 (__v8sf) __W,
5707 (__mmask8) __U);
5710 extern __inline __m256
5711 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5712 _mm256_maskz_compress_ps (__mmask8 __U, __m256 __A)
5714 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
5715 (__v8sf)
5716 _mm256_setzero_ps (),
5717 (__mmask8) __U);
5720 extern __inline void
5721 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5722 _mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A)
5724 __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
5725 (__v8sf) __A,
5726 (__mmask8) __U);
5729 extern __inline __m128
5730 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5731 _mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A)
5733 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
5734 (__v4sf) __W,
5735 (__mmask8) __U);
5738 extern __inline __m128
5739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5740 _mm_maskz_compress_ps (__mmask8 __U, __m128 __A)
5742 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
5743 (__v4sf)
5744 _mm_setzero_ps (),
5745 (__mmask8) __U);
5748 extern __inline void
5749 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5750 _mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A)
5752 __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
5753 (__v4sf) __A,
5754 (__mmask8) __U);
5757 extern __inline __m256i
5758 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5759 _mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
5761 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
5762 (__v4di) __W,
5763 (__mmask8) __U);
5766 extern __inline __m256i
5767 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5768 _mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A)
5770 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
5771 (__v4di)
5772 _mm256_setzero_si256 (),
5773 (__mmask8) __U);
5776 extern __inline void
5777 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5778 _mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A)
5780 __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
5781 (__v4di) __A,
5782 (__mmask8) __U);
5785 extern __inline __m128i
5786 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5787 _mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
5789 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
5790 (__v2di) __W,
5791 (__mmask8) __U);
5794 extern __inline __m128i
5795 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5796 _mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A)
5798 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
5799 (__v2di)
5800 _mm_setzero_di (),
5801 (__mmask8) __U);
5804 extern __inline void
5805 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5806 _mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A)
5808 __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
5809 (__v2di) __A,
5810 (__mmask8) __U);
5813 extern __inline __m256i
5814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5815 _mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
5817 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
5818 (__v8si) __W,
5819 (__mmask8) __U);
5822 extern __inline __m256i
5823 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5824 _mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A)
5826 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
5827 (__v8si)
5828 _mm256_setzero_si256 (),
5829 (__mmask8) __U);
5832 extern __inline void
5833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5834 _mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A)
5836 __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
5837 (__v8si) __A,
5838 (__mmask8) __U);
5841 extern __inline __m128i
5842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5843 _mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
5845 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
5846 (__v4si) __W,
5847 (__mmask8) __U);
5850 extern __inline __m128i
5851 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5852 _mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A)
5854 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
5855 (__v4si)
5856 _mm_setzero_si128 (),
5857 (__mmask8) __U);
5860 extern __inline void
5861 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5862 _mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A)
5864 __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
5865 (__v4si) __A,
5866 (__mmask8) __U);
5869 extern __inline __m256d
5870 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5871 _mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A)
5873 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
5874 (__v4df) __W,
5875 (__mmask8) __U);
5878 extern __inline __m256d
5879 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5880 _mm256_maskz_expand_pd (__mmask8 __U, __m256d __A)
5882 return (__m256d) __builtin_ia32_expanddf256_maskz ((__v4df) __A,
5883 (__v4df)
5884 _mm256_setzero_pd (),
5885 (__mmask8) __U);
5888 extern __inline __m256d
5889 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5890 _mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P)
5892 return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
5893 (__v4df) __W,
5894 (__mmask8)
5895 __U);
5898 extern __inline __m256d
5899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5900 _mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
5902 return (__m256d) __builtin_ia32_expandloaddf256_maskz ((__v4df *) __P,
5903 (__v4df)
5904 _mm256_setzero_pd (),
5905 (__mmask8)
5906 __U);
5909 extern __inline __m128d
5910 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5911 _mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A)
5913 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
5914 (__v2df) __W,
5915 (__mmask8) __U);
5918 extern __inline __m128d
5919 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5920 _mm_maskz_expand_pd (__mmask8 __U, __m128d __A)
5922 return (__m128d) __builtin_ia32_expanddf128_maskz ((__v2df) __A,
5923 (__v2df)
5924 _mm_setzero_pd (),
5925 (__mmask8) __U);
5928 extern __inline __m128d
5929 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5930 _mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P)
5932 return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
5933 (__v2df) __W,
5934 (__mmask8)
5935 __U);
5938 extern __inline __m128d
5939 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5940 _mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
5942 return (__m128d) __builtin_ia32_expandloaddf128_maskz ((__v2df *) __P,
5943 (__v2df)
5944 _mm_setzero_pd (),
5945 (__mmask8)
5946 __U);
5949 extern __inline __m256
5950 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5951 _mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A)
5953 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
5954 (__v8sf) __W,
5955 (__mmask8) __U);
5958 extern __inline __m256
5959 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5960 _mm256_maskz_expand_ps (__mmask8 __U, __m256 __A)
5962 return (__m256) __builtin_ia32_expandsf256_maskz ((__v8sf) __A,
5963 (__v8sf)
5964 _mm256_setzero_ps (),
5965 (__mmask8) __U);
5968 extern __inline __m256
5969 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5970 _mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P)
5972 return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
5973 (__v8sf) __W,
5974 (__mmask8) __U);
5977 extern __inline __m256
5978 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5979 _mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P)
5981 return (__m256) __builtin_ia32_expandloadsf256_maskz ((__v8sf *) __P,
5982 (__v8sf)
5983 _mm256_setzero_ps (),
5984 (__mmask8)
5985 __U);
5988 extern __inline __m128
5989 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5990 _mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A)
5992 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
5993 (__v4sf) __W,
5994 (__mmask8) __U);
5997 extern __inline __m128
5998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5999 _mm_maskz_expand_ps (__mmask8 __U, __m128 __A)
6001 return (__m128) __builtin_ia32_expandsf128_maskz ((__v4sf) __A,
6002 (__v4sf)
6003 _mm_setzero_ps (),
6004 (__mmask8) __U);
6007 extern __inline __m128
6008 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6009 _mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P)
6011 return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
6012 (__v4sf) __W,
6013 (__mmask8) __U);
6016 extern __inline __m128
6017 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6018 _mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P)
6020 return (__m128) __builtin_ia32_expandloadsf128_maskz ((__v4sf *) __P,
6021 (__v4sf)
6022 _mm_setzero_ps (),
6023 (__mmask8)
6024 __U);
6027 extern __inline __m256i
6028 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6029 _mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
6031 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
6032 (__v4di) __W,
6033 (__mmask8) __U);
6036 extern __inline __m256i
6037 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6038 _mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A)
6040 return (__m256i) __builtin_ia32_expanddi256_maskz ((__v4di) __A,
6041 (__v4di)
6042 _mm256_setzero_si256 (),
6043 (__mmask8) __U);
6046 extern __inline __m256i
6047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6048 _mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U,
6049 void const *__P)
6051 return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
6052 (__v4di) __W,
6053 (__mmask8)
6054 __U);
6057 extern __inline __m256i
6058 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6059 _mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
6061 return (__m256i) __builtin_ia32_expandloaddi256_maskz ((__v4di *) __P,
6062 (__v4di)
6063 _mm256_setzero_si256 (),
6064 (__mmask8)
6065 __U);
6068 extern __inline __m128i
6069 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6070 _mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
6072 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
6073 (__v2di) __W,
6074 (__mmask8) __U);
6077 extern __inline __m128i
6078 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6079 _mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A)
6081 return (__m128i) __builtin_ia32_expanddi128_maskz ((__v2di) __A,
6082 (__v2di)
6083 _mm_setzero_si128 (),
6084 (__mmask8) __U);
6087 extern __inline __m128i
6088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6089 _mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
6091 return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
6092 (__v2di) __W,
6093 (__mmask8)
6094 __U);
6097 extern __inline __m128i
6098 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6099 _mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
6101 return (__m128i) __builtin_ia32_expandloaddi128_maskz ((__v2di *) __P,
6102 (__v2di)
6103 _mm_setzero_si128 (),
6104 (__mmask8)
6105 __U);
6108 extern __inline __m256i
6109 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6110 _mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
6112 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
6113 (__v8si) __W,
6114 (__mmask8) __U);
6117 extern __inline __m256i
6118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6119 _mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A)
6121 return (__m256i) __builtin_ia32_expandsi256_maskz ((__v8si) __A,
6122 (__v8si)
6123 _mm256_setzero_si256 (),
6124 (__mmask8) __U);
6127 extern __inline __m256i
6128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6129 _mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U,
6130 void const *__P)
6132 return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
6133 (__v8si) __W,
6134 (__mmask8)
6135 __U);
6138 extern __inline __m256i
6139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6140 _mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P)
6142 return (__m256i) __builtin_ia32_expandloadsi256_maskz ((__v8si *) __P,
6143 (__v8si)
6144 _mm256_setzero_si256 (),
6145 (__mmask8)
6146 __U);
6149 extern __inline __m128i
6150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6151 _mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
6153 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
6154 (__v4si) __W,
6155 (__mmask8) __U);
6158 extern __inline __m128i
6159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6160 _mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A)
6162 return (__m128i) __builtin_ia32_expandsi128_maskz ((__v4si) __A,
6163 (__v4si)
6164 _mm_setzero_si128 (),
6165 (__mmask8) __U);
6168 extern __inline __m128i
6169 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6170 _mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
6172 return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
6173 (__v4si) __W,
6174 (__mmask8)
6175 __U);
6178 extern __inline __m128i
6179 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6180 _mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P)
6182 return (__m128i) __builtin_ia32_expandloadsi128_maskz ((__v4si *) __P,
6183 (__v4si)
6184 _mm_setzero_si128 (),
6185 (__mmask8)
6186 __U);
6189 extern __inline __m256d
6190 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6191 _mm256_permutex2var_pd (__m256d __A, __m256i __I, __m256d __B)
6193 return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
6194 /* idx */ ,
6195 (__v4df) __A,
6196 (__v4df) __B,
6197 (__mmask8) -
6201 extern __inline __m256d
6202 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6203 _mm256_mask_permutex2var_pd (__m256d __A, __mmask8 __U, __m256i __I,
6204 __m256d __B)
6206 return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
6207 /* idx */ ,
6208 (__v4df) __A,
6209 (__v4df) __B,
6210 (__mmask8)
6211 __U);
6214 extern __inline __m256d
6215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6216 _mm256_mask2_permutex2var_pd (__m256d __A, __m256i __I, __mmask8 __U,
6217 __m256d __B)
6219 return (__m256d) __builtin_ia32_vpermi2varpd256_mask ((__v4df) __A,
6220 (__v4di) __I
6221 /* idx */ ,
6222 (__v4df) __B,
6223 (__mmask8)
6224 __U);
6227 extern __inline __m256d
6228 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6229 _mm256_maskz_permutex2var_pd (__mmask8 __U, __m256d __A, __m256i __I,
6230 __m256d __B)
6232 return (__m256d) __builtin_ia32_vpermt2varpd256_maskz ((__v4di) __I
6233 /* idx */ ,
6234 (__v4df) __A,
6235 (__v4df) __B,
6236 (__mmask8)
6237 __U);
6240 extern __inline __m256
6241 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6242 _mm256_permutex2var_ps (__m256 __A, __m256i __I, __m256 __B)
6244 return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
6245 /* idx */ ,
6246 (__v8sf) __A,
6247 (__v8sf) __B,
6248 (__mmask8) -1);
6251 extern __inline __m256
6252 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6253 _mm256_mask_permutex2var_ps (__m256 __A, __mmask8 __U, __m256i __I,
6254 __m256 __B)
6256 return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
6257 /* idx */ ,
6258 (__v8sf) __A,
6259 (__v8sf) __B,
6260 (__mmask8) __U);
6263 extern __inline __m256
6264 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6265 _mm256_mask2_permutex2var_ps (__m256 __A, __m256i __I, __mmask8 __U,
6266 __m256 __B)
6268 return (__m256) __builtin_ia32_vpermi2varps256_mask ((__v8sf) __A,
6269 (__v8si) __I
6270 /* idx */ ,
6271 (__v8sf) __B,
6272 (__mmask8) __U);
6275 extern __inline __m256
6276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6277 _mm256_maskz_permutex2var_ps (__mmask8 __U, __m256 __A, __m256i __I,
6278 __m256 __B)
6280 return (__m256) __builtin_ia32_vpermt2varps256_maskz ((__v8si) __I
6281 /* idx */ ,
6282 (__v8sf) __A,
6283 (__v8sf) __B,
6284 (__mmask8)
6285 __U);
6288 extern __inline __m128i
6289 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6290 _mm_permutex2var_epi64 (__m128i __A, __m128i __I, __m128i __B)
6292 return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
6293 /* idx */ ,
6294 (__v2di) __A,
6295 (__v2di) __B,
6296 (__mmask8) -1);
6299 extern __inline __m128i
6300 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6301 _mm_mask_permutex2var_epi64 (__m128i __A, __mmask8 __U, __m128i __I,
6302 __m128i __B)
6304 return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
6305 /* idx */ ,
6306 (__v2di) __A,
6307 (__v2di) __B,
6308 (__mmask8) __U);
6311 extern __inline __m128i
6312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6313 _mm_mask2_permutex2var_epi64 (__m128i __A, __m128i __I, __mmask8 __U,
6314 __m128i __B)
6316 return (__m128i) __builtin_ia32_vpermi2varq128_mask ((__v2di) __A,
6317 (__v2di) __I
6318 /* idx */ ,
6319 (__v2di) __B,
6320 (__mmask8) __U);
6323 extern __inline __m128i
6324 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6325 _mm_maskz_permutex2var_epi64 (__mmask8 __U, __m128i __A, __m128i __I,
6326 __m128i __B)
6328 return (__m128i) __builtin_ia32_vpermt2varq128_maskz ((__v2di) __I
6329 /* idx */ ,
6330 (__v2di) __A,
6331 (__v2di) __B,
6332 (__mmask8)
6333 __U);
6336 extern __inline __m128i
6337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6338 _mm_permutex2var_epi32 (__m128i __A, __m128i __I, __m128i __B)
6340 return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
6341 /* idx */ ,
6342 (__v4si) __A,
6343 (__v4si) __B,
6344 (__mmask8) -1);
6347 extern __inline __m128i
6348 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6349 _mm_mask_permutex2var_epi32 (__m128i __A, __mmask8 __U, __m128i __I,
6350 __m128i __B)
6352 return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
6353 /* idx */ ,
6354 (__v4si) __A,
6355 (__v4si) __B,
6356 (__mmask8) __U);
6359 extern __inline __m128i
6360 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6361 _mm_mask2_permutex2var_epi32 (__m128i __A, __m128i __I, __mmask8 __U,
6362 __m128i __B)
6364 return (__m128i) __builtin_ia32_vpermi2vard128_mask ((__v4si) __A,
6365 (__v4si) __I
6366 /* idx */ ,
6367 (__v4si) __B,
6368 (__mmask8) __U);
6371 extern __inline __m128i
6372 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6373 _mm_maskz_permutex2var_epi32 (__mmask8 __U, __m128i __A, __m128i __I,
6374 __m128i __B)
6376 return (__m128i) __builtin_ia32_vpermt2vard128_maskz ((__v4si) __I
6377 /* idx */ ,
6378 (__v4si) __A,
6379 (__v4si) __B,
6380 (__mmask8)
6381 __U);
6384 extern __inline __m256i
6385 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6386 _mm256_permutex2var_epi64 (__m256i __A, __m256i __I, __m256i __B)
6388 return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
6389 /* idx */ ,
6390 (__v4di) __A,
6391 (__v4di) __B,
6392 (__mmask8) -1);
6395 extern __inline __m256i
6396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6397 _mm256_mask_permutex2var_epi64 (__m256i __A, __mmask8 __U, __m256i __I,
6398 __m256i __B)
6400 return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
6401 /* idx */ ,
6402 (__v4di) __A,
6403 (__v4di) __B,
6404 (__mmask8) __U);
6407 extern __inline __m256i
6408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6409 _mm256_mask2_permutex2var_epi64 (__m256i __A, __m256i __I,
6410 __mmask8 __U, __m256i __B)
6412 return (__m256i) __builtin_ia32_vpermi2varq256_mask ((__v4di) __A,
6413 (__v4di) __I
6414 /* idx */ ,
6415 (__v4di) __B,
6416 (__mmask8) __U);
6419 extern __inline __m256i
6420 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6421 _mm256_maskz_permutex2var_epi64 (__mmask8 __U, __m256i __A,
6422 __m256i __I, __m256i __B)
6424 return (__m256i) __builtin_ia32_vpermt2varq256_maskz ((__v4di) __I
6425 /* idx */ ,
6426 (__v4di) __A,
6427 (__v4di) __B,
6428 (__mmask8)
6429 __U);
6432 extern __inline __m256i
6433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6434 _mm256_permutex2var_epi32 (__m256i __A, __m256i __I, __m256i __B)
6436 return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
6437 /* idx */ ,
6438 (__v8si) __A,
6439 (__v8si) __B,
6440 (__mmask8) -1);
6443 extern __inline __m256i
6444 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6445 _mm256_mask_permutex2var_epi32 (__m256i __A, __mmask8 __U, __m256i __I,
6446 __m256i __B)
6448 return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
6449 /* idx */ ,
6450 (__v8si) __A,
6451 (__v8si) __B,
6452 (__mmask8) __U);
6455 extern __inline __m256i
6456 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6457 _mm256_mask2_permutex2var_epi32 (__m256i __A, __m256i __I,
6458 __mmask8 __U, __m256i __B)
6460 return (__m256i) __builtin_ia32_vpermi2vard256_mask ((__v8si) __A,
6461 (__v8si) __I
6462 /* idx */ ,
6463 (__v8si) __B,
6464 (__mmask8) __U);
6467 extern __inline __m256i
6468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6469 _mm256_maskz_permutex2var_epi32 (__mmask8 __U, __m256i __A,
6470 __m256i __I, __m256i __B)
6472 return (__m256i) __builtin_ia32_vpermt2vard256_maskz ((__v8si) __I
6473 /* idx */ ,
6474 (__v8si) __A,
6475 (__v8si) __B,
6476 (__mmask8)
6477 __U);
6480 extern __inline __m128d
6481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6482 _mm_permutex2var_pd (__m128d __A, __m128i __I, __m128d __B)
6484 return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
6485 /* idx */ ,
6486 (__v2df) __A,
6487 (__v2df) __B,
6488 (__mmask8) -
6492 extern __inline __m128d
6493 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6494 _mm_mask_permutex2var_pd (__m128d __A, __mmask8 __U, __m128i __I,
6495 __m128d __B)
6497 return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
6498 /* idx */ ,
6499 (__v2df) __A,
6500 (__v2df) __B,
6501 (__mmask8)
6502 __U);
6505 extern __inline __m128d
6506 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6507 _mm_mask2_permutex2var_pd (__m128d __A, __m128i __I, __mmask8 __U,
6508 __m128d __B)
6510 return (__m128d) __builtin_ia32_vpermi2varpd128_mask ((__v2df) __A,
6511 (__v2di) __I
6512 /* idx */ ,
6513 (__v2df) __B,
6514 (__mmask8)
6515 __U);
6518 extern __inline __m128d
6519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6520 _mm_maskz_permutex2var_pd (__mmask8 __U, __m128d __A, __m128i __I,
6521 __m128d __B)
6523 return (__m128d) __builtin_ia32_vpermt2varpd128_maskz ((__v2di) __I
6524 /* idx */ ,
6525 (__v2df) __A,
6526 (__v2df) __B,
6527 (__mmask8)
6528 __U);
6531 extern __inline __m128
6532 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6533 _mm_permutex2var_ps (__m128 __A, __m128i __I, __m128 __B)
6535 return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
6536 /* idx */ ,
6537 (__v4sf) __A,
6538 (__v4sf) __B,
6539 (__mmask8) -1);
6542 extern __inline __m128
6543 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6544 _mm_mask_permutex2var_ps (__m128 __A, __mmask8 __U, __m128i __I,
6545 __m128 __B)
6547 return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
6548 /* idx */ ,
6549 (__v4sf) __A,
6550 (__v4sf) __B,
6551 (__mmask8) __U);
6554 extern __inline __m128
6555 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6556 _mm_mask2_permutex2var_ps (__m128 __A, __m128i __I, __mmask8 __U,
6557 __m128 __B)
6559 return (__m128) __builtin_ia32_vpermi2varps128_mask ((__v4sf) __A,
6560 (__v4si) __I
6561 /* idx */ ,
6562 (__v4sf) __B,
6563 (__mmask8) __U);
6566 extern __inline __m128
6567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6568 _mm_maskz_permutex2var_ps (__mmask8 __U, __m128 __A, __m128i __I,
6569 __m128 __B)
6571 return (__m128) __builtin_ia32_vpermt2varps128_maskz ((__v4si) __I
6572 /* idx */ ,
6573 (__v4sf) __A,
6574 (__v4sf) __B,
6575 (__mmask8)
6576 __U);
6579 extern __inline __m128i
6580 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6581 _mm_srav_epi64 (__m128i __X, __m128i __Y)
6583 return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
6584 (__v2di) __Y,
6585 (__v2di)
6586 _mm_setzero_di (),
6587 (__mmask8) -1);
6590 extern __inline __m128i
6591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6592 _mm_mask_srav_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
6593 __m128i __Y)
6595 return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
6596 (__v2di) __Y,
6597 (__v2di) __W,
6598 (__mmask8) __U);
6601 extern __inline __m128i
6602 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6603 _mm_maskz_srav_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
6605 return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
6606 (__v2di) __Y,
6607 (__v2di)
6608 _mm_setzero_di (),
6609 (__mmask8) __U);
6612 extern __inline __m256i
6613 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6614 _mm256_mask_sllv_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
6615 __m256i __Y)
6617 return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X,
6618 (__v8si) __Y,
6619 (__v8si) __W,
6620 (__mmask8) __U);
6623 extern __inline __m256i
6624 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6625 _mm256_maskz_sllv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
6627 return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X,
6628 (__v8si) __Y,
6629 (__v8si)
6630 _mm256_setzero_si256 (),
6631 (__mmask8) __U);
6634 extern __inline __m128i
6635 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6636 _mm_mask_sllv_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
6637 __m128i __Y)
6639 return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X,
6640 (__v4si) __Y,
6641 (__v4si) __W,
6642 (__mmask8) __U);
6645 extern __inline __m128i
6646 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6647 _mm_maskz_sllv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
6649 return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X,
6650 (__v4si) __Y,
6651 (__v4si)
6652 _mm_setzero_si128 (),
6653 (__mmask8) __U);
6656 extern __inline __m256i
6657 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6658 _mm256_mask_sllv_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
6659 __m256i __Y)
6661 return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X,
6662 (__v4di) __Y,
6663 (__v4di) __W,
6664 (__mmask8) __U);
6667 extern __inline __m256i
6668 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6669 _mm256_maskz_sllv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
6671 return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X,
6672 (__v4di) __Y,
6673 (__v4di)
6674 _mm256_setzero_si256 (),
6675 (__mmask8) __U);
6678 extern __inline __m128i
6679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6680 _mm_mask_sllv_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
6681 __m128i __Y)
6683 return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
6684 (__v2di) __Y,
6685 (__v2di) __W,
6686 (__mmask8) __U);
6689 extern __inline __m128i
6690 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6691 _mm_maskz_sllv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
6693 return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
6694 (__v2di) __Y,
6695 (__v2di)
6696 _mm_setzero_di (),
6697 (__mmask8) __U);
6700 extern __inline __m256i
6701 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6702 _mm256_mask_srav_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
6703 __m256i __Y)
6705 return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X,
6706 (__v8si) __Y,
6707 (__v8si) __W,
6708 (__mmask8) __U);
6711 extern __inline __m256i
6712 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6713 _mm256_maskz_srav_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
6715 return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X,
6716 (__v8si) __Y,
6717 (__v8si)
6718 _mm256_setzero_si256 (),
6719 (__mmask8) __U);
6722 extern __inline __m128i
6723 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6724 _mm_mask_srav_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
6725 __m128i __Y)
6727 return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X,
6728 (__v4si) __Y,
6729 (__v4si) __W,
6730 (__mmask8) __U);
6733 extern __inline __m128i
6734 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6735 _mm_maskz_srav_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
6737 return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X,
6738 (__v4si) __Y,
6739 (__v4si)
6740 _mm_setzero_si128 (),
6741 (__mmask8) __U);
6744 extern __inline __m256i
6745 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6746 _mm256_mask_srlv_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
6747 __m256i __Y)
6749 return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X,
6750 (__v8si) __Y,
6751 (__v8si) __W,
6752 (__mmask8) __U);
6755 extern __inline __m256i
6756 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6757 _mm256_maskz_srlv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
6759 return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X,
6760 (__v8si) __Y,
6761 (__v8si)
6762 _mm256_setzero_si256 (),
6763 (__mmask8) __U);
6766 extern __inline __m128i
6767 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6768 _mm_mask_srlv_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
6769 __m128i __Y)
6771 return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X,
6772 (__v4si) __Y,
6773 (__v4si) __W,
6774 (__mmask8) __U);
6777 extern __inline __m128i
6778 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6779 _mm_maskz_srlv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
6781 return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X,
6782 (__v4si) __Y,
6783 (__v4si)
6784 _mm_setzero_si128 (),
6785 (__mmask8) __U);
6788 extern __inline __m256i
6789 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6790 _mm256_mask_srlv_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
6791 __m256i __Y)
6793 return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X,
6794 (__v4di) __Y,
6795 (__v4di) __W,
6796 (__mmask8) __U);
6799 extern __inline __m256i
6800 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6801 _mm256_maskz_srlv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
6803 return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X,
6804 (__v4di) __Y,
6805 (__v4di)
6806 _mm256_setzero_si256 (),
6807 (__mmask8) __U);
6810 extern __inline __m128i
6811 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6812 _mm_mask_srlv_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
6813 __m128i __Y)
6815 return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
6816 (__v2di) __Y,
6817 (__v2di) __W,
6818 (__mmask8) __U);
6821 extern __inline __m128i
6822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6823 _mm_maskz_srlv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
6825 return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
6826 (__v2di) __Y,
6827 (__v2di)
6828 _mm_setzero_di (),
6829 (__mmask8) __U);
6832 extern __inline __m256i
6833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6834 _mm256_rolv_epi32 (__m256i __A, __m256i __B)
6836 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
6837 (__v8si) __B,
6838 (__v8si)
6839 _mm256_setzero_si256 (),
6840 (__mmask8) -1);
6843 extern __inline __m256i
6844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6845 _mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
6846 __m256i __B)
6848 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
6849 (__v8si) __B,
6850 (__v8si) __W,
6851 (__mmask8) __U);
6854 extern __inline __m256i
6855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6856 _mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
6858 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
6859 (__v8si) __B,
6860 (__v8si)
6861 _mm256_setzero_si256 (),
6862 (__mmask8) __U);
6865 extern __inline __m128i
6866 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6867 _mm_rolv_epi32 (__m128i __A, __m128i __B)
6869 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
6870 (__v4si) __B,
6871 (__v4si)
6872 _mm_setzero_si128 (),
6873 (__mmask8) -1);
6876 extern __inline __m128i
6877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6878 _mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
6879 __m128i __B)
6881 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
6882 (__v4si) __B,
6883 (__v4si) __W,
6884 (__mmask8) __U);
6887 extern __inline __m128i
6888 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6889 _mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
6891 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
6892 (__v4si) __B,
6893 (__v4si)
6894 _mm_setzero_si128 (),
6895 (__mmask8) __U);
6898 extern __inline __m256i
6899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6900 _mm256_rorv_epi32 (__m256i __A, __m256i __B)
6902 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
6903 (__v8si) __B,
6904 (__v8si)
6905 _mm256_setzero_si256 (),
6906 (__mmask8) -1);
6909 extern __inline __m256i
6910 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6911 _mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
6912 __m256i __B)
6914 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
6915 (__v8si) __B,
6916 (__v8si) __W,
6917 (__mmask8) __U);
6920 extern __inline __m256i
6921 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6922 _mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
6924 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
6925 (__v8si) __B,
6926 (__v8si)
6927 _mm256_setzero_si256 (),
6928 (__mmask8) __U);
6931 extern __inline __m128i
6932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6933 _mm_rorv_epi32 (__m128i __A, __m128i __B)
6935 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
6936 (__v4si) __B,
6937 (__v4si)
6938 _mm_setzero_si128 (),
6939 (__mmask8) -1);
6942 extern __inline __m128i
6943 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6944 _mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
6945 __m128i __B)
6947 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
6948 (__v4si) __B,
6949 (__v4si) __W,
6950 (__mmask8) __U);
6953 extern __inline __m128i
6954 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6955 _mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
6957 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
6958 (__v4si) __B,
6959 (__v4si)
6960 _mm_setzero_si128 (),
6961 (__mmask8) __U);
6964 extern __inline __m256i
6965 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6966 _mm256_rolv_epi64 (__m256i __A, __m256i __B)
6968 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
6969 (__v4di) __B,
6970 (__v4di)
6971 _mm256_setzero_si256 (),
6972 (__mmask8) -1);
6975 extern __inline __m256i
6976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6977 _mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
6978 __m256i __B)
6980 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
6981 (__v4di) __B,
6982 (__v4di) __W,
6983 (__mmask8) __U);
6986 extern __inline __m256i
6987 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6988 _mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
6990 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
6991 (__v4di) __B,
6992 (__v4di)
6993 _mm256_setzero_si256 (),
6994 (__mmask8) __U);
6997 extern __inline __m128i
6998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6999 _mm_rolv_epi64 (__m128i __A, __m128i __B)
7001 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
7002 (__v2di) __B,
7003 (__v2di)
7004 _mm_setzero_di (),
7005 (__mmask8) -1);
7008 extern __inline __m128i
7009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7010 _mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7011 __m128i __B)
7013 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
7014 (__v2di) __B,
7015 (__v2di) __W,
7016 (__mmask8) __U);
7019 extern __inline __m128i
7020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7021 _mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7023 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
7024 (__v2di) __B,
7025 (__v2di)
7026 _mm_setzero_di (),
7027 (__mmask8) __U);
7030 extern __inline __m256i
7031 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7032 _mm256_rorv_epi64 (__m256i __A, __m256i __B)
7034 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
7035 (__v4di) __B,
7036 (__v4di)
7037 _mm256_setzero_si256 (),
7038 (__mmask8) -1);
7041 extern __inline __m256i
7042 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7043 _mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7044 __m256i __B)
7046 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
7047 (__v4di) __B,
7048 (__v4di) __W,
7049 (__mmask8) __U);
7052 extern __inline __m256i
7053 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7054 _mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7056 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
7057 (__v4di) __B,
7058 (__v4di)
7059 _mm256_setzero_si256 (),
7060 (__mmask8) __U);
7063 extern __inline __m128i
7064 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7065 _mm_rorv_epi64 (__m128i __A, __m128i __B)
7067 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
7068 (__v2di) __B,
7069 (__v2di)
7070 _mm_setzero_di (),
7071 (__mmask8) -1);
7074 extern __inline __m128i
7075 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7076 _mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7077 __m128i __B)
7079 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
7080 (__v2di) __B,
7081 (__v2di) __W,
7082 (__mmask8) __U);
7085 extern __inline __m128i
7086 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7087 _mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7089 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
7090 (__v2di) __B,
7091 (__v2di)
7092 _mm_setzero_di (),
7093 (__mmask8) __U);
7096 extern __inline __m256i
7097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7098 _mm256_srav_epi64 (__m256i __X, __m256i __Y)
7100 return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
7101 (__v4di) __Y,
7102 (__v4di)
7103 _mm256_setzero_si256 (),
7104 (__mmask8) -1);
7107 extern __inline __m256i
7108 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7109 _mm256_mask_srav_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
7110 __m256i __Y)
7112 return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
7113 (__v4di) __Y,
7114 (__v4di) __W,
7115 (__mmask8) __U);
7118 extern __inline __m256i
7119 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7120 _mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
7122 return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
7123 (__v4di) __Y,
7124 (__v4di)
7125 _mm256_setzero_si256 (),
7126 (__mmask8) __U);
7129 extern __inline __m256i
7130 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7131 _mm256_mask_and_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7132 __m256i __B)
7134 return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A,
7135 (__v4di) __B,
7136 (__v4di) __W, __U);
7139 extern __inline __m256i
7140 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7141 _mm256_maskz_and_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7143 return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A,
7144 (__v4di) __B,
7145 (__v4di)
7146 _mm256_setzero_pd (),
7147 __U);
7150 extern __inline __m128i
7151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7152 _mm_mask_and_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7153 __m128i __B)
7155 return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A,
7156 (__v2di) __B,
7157 (__v2di) __W, __U);
7160 extern __inline __m128i
7161 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7162 _mm_maskz_and_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7164 return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A,
7165 (__v2di) __B,
7166 (__v2di)
7167 _mm_setzero_pd (),
7168 __U);
7171 extern __inline __m256i
7172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7173 _mm256_mask_andnot_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7174 __m256i __B)
7176 return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A,
7177 (__v4di) __B,
7178 (__v4di) __W, __U);
7181 extern __inline __m256i
7182 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7183 _mm256_maskz_andnot_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7185 return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A,
7186 (__v4di) __B,
7187 (__v4di)
7188 _mm256_setzero_pd (),
7189 __U);
7192 extern __inline __m128i
7193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7194 _mm_mask_andnot_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7195 __m128i __B)
7197 return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A,
7198 (__v2di) __B,
7199 (__v2di) __W, __U);
7202 extern __inline __m128i
7203 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7204 _mm_maskz_andnot_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7206 return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A,
7207 (__v2di) __B,
7208 (__v2di)
7209 _mm_setzero_pd (),
7210 __U);
7213 extern __inline __m256i
7214 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7215 _mm256_mask_or_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7216 __m256i __B)
7218 return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A,
7219 (__v4di) __B,
7220 (__v4di) __W,
7221 (__mmask8) __U);
7224 extern __inline __m256i
7225 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7226 _mm256_maskz_or_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7228 return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A,
7229 (__v4di) __B,
7230 (__v4di)
7231 _mm256_setzero_si256 (),
7232 (__mmask8) __U);
7235 extern __inline __m128i
7236 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7237 _mm_mask_or_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
7239 return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
7240 (__v2di) __B,
7241 (__v2di) __W,
7242 (__mmask8) __U);
7245 extern __inline __m128i
7246 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7247 _mm_maskz_or_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7249 return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
7250 (__v2di) __B,
7251 (__v2di)
7252 _mm_setzero_si128 (),
7253 (__mmask8) __U);
7256 extern __inline __m256i
7257 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7258 _mm256_mask_xor_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7259 __m256i __B)
7261 return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A,
7262 (__v4di) __B,
7263 (__v4di) __W,
7264 (__mmask8) __U);
7267 extern __inline __m256i
7268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7269 _mm256_maskz_xor_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7271 return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A,
7272 (__v4di) __B,
7273 (__v4di)
7274 _mm256_setzero_si256 (),
7275 (__mmask8) __U);
7278 extern __inline __m128i
7279 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7280 _mm_mask_xor_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7281 __m128i __B)
7283 return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A,
7284 (__v2di) __B,
7285 (__v2di) __W,
7286 (__mmask8) __U);
7289 extern __inline __m128i
7290 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7291 _mm_maskz_xor_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7293 return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A,
7294 (__v2di) __B,
7295 (__v2di)
7296 _mm_setzero_si128 (),
7297 (__mmask8) __U);
7300 extern __inline __m256d
7301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7302 _mm256_mask_max_pd (__m256d __W, __mmask8 __U, __m256d __A,
7303 __m256d __B)
7305 return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
7306 (__v4df) __B,
7307 (__v4df) __W,
7308 (__mmask8) __U);
7311 extern __inline __m256d
7312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7313 _mm256_maskz_max_pd (__mmask8 __U, __m256d __A, __m256d __B)
7315 return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
7316 (__v4df) __B,
7317 (__v4df)
7318 _mm256_setzero_pd (),
7319 (__mmask8) __U);
7322 extern __inline __m256
7323 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7324 _mm256_mask_max_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7326 return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
7327 (__v8sf) __B,
7328 (__v8sf) __W,
7329 (__mmask8) __U);
7332 extern __inline __m256
7333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7334 _mm256_maskz_max_ps (__mmask8 __U, __m256 __A, __m256 __B)
7336 return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
7337 (__v8sf) __B,
7338 (__v8sf)
7339 _mm256_setzero_ps (),
7340 (__mmask8) __U);
7343 extern __inline __m128
7344 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7345 _mm_mask_div_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7347 return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
7348 (__v4sf) __B,
7349 (__v4sf) __W,
7350 (__mmask8) __U);
7353 extern __inline __m128
7354 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7355 _mm_maskz_div_ps (__mmask8 __U, __m128 __A, __m128 __B)
7357 return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
7358 (__v4sf) __B,
7359 (__v4sf)
7360 _mm_setzero_ps (),
7361 (__mmask8) __U);
7364 extern __inline __m128d
7365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7366 _mm_mask_div_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7368 return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
7369 (__v2df) __B,
7370 (__v2df) __W,
7371 (__mmask8) __U);
7374 extern __inline __m128d
7375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7376 _mm_maskz_div_pd (__mmask8 __U, __m128d __A, __m128d __B)
7378 return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
7379 (__v2df) __B,
7380 (__v2df)
7381 _mm_setzero_pd (),
7382 (__mmask8) __U);
7385 extern __inline __m256d
7386 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7387 _mm256_mask_min_pd (__m256d __W, __mmask8 __U, __m256d __A,
7388 __m256d __B)
7390 return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
7391 (__v4df) __B,
7392 (__v4df) __W,
7393 (__mmask8) __U);
7396 extern __inline __m256d
7397 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7398 _mm256_mask_div_pd (__m256d __W, __mmask8 __U, __m256d __A,
7399 __m256d __B)
7401 return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
7402 (__v4df) __B,
7403 (__v4df) __W,
7404 (__mmask8) __U);
7407 extern __inline __m256d
7408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7409 _mm256_maskz_min_pd (__mmask8 __U, __m256d __A, __m256d __B)
7411 return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
7412 (__v4df) __B,
7413 (__v4df)
7414 _mm256_setzero_pd (),
7415 (__mmask8) __U);
7418 extern __inline __m256
7419 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7420 _mm256_mask_min_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7422 return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
7423 (__v8sf) __B,
7424 (__v8sf) __W,
7425 (__mmask8) __U);
7428 extern __inline __m256d
7429 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7430 _mm256_maskz_div_pd (__mmask8 __U, __m256d __A, __m256d __B)
7432 return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
7433 (__v4df) __B,
7434 (__v4df)
7435 _mm256_setzero_pd (),
7436 (__mmask8) __U);
7439 extern __inline __m256
7440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7441 _mm256_mask_div_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7443 return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
7444 (__v8sf) __B,
7445 (__v8sf) __W,
7446 (__mmask8) __U);
7449 extern __inline __m256
7450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7451 _mm256_maskz_min_ps (__mmask8 __U, __m256 __A, __m256 __B)
7453 return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
7454 (__v8sf) __B,
7455 (__v8sf)
7456 _mm256_setzero_ps (),
7457 (__mmask8) __U);
7460 extern __inline __m256
7461 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7462 _mm256_maskz_div_ps (__mmask8 __U, __m256 __A, __m256 __B)
7464 return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
7465 (__v8sf) __B,
7466 (__v8sf)
7467 _mm256_setzero_ps (),
7468 (__mmask8) __U);
7471 extern __inline __m128
7472 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7473 _mm_mask_min_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7475 return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
7476 (__v4sf) __B,
7477 (__v4sf) __W,
7478 (__mmask8) __U);
7481 extern __inline __m128
7482 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7483 _mm_mask_mul_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7485 return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
7486 (__v4sf) __B,
7487 (__v4sf) __W,
7488 (__mmask8) __U);
7491 extern __inline __m128
7492 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7493 _mm_maskz_min_ps (__mmask8 __U, __m128 __A, __m128 __B)
7495 return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
7496 (__v4sf) __B,
7497 (__v4sf)
7498 _mm_setzero_ps (),
7499 (__mmask8) __U);
7502 extern __inline __m128
7503 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7504 _mm_maskz_mul_ps (__mmask8 __U, __m128 __A, __m128 __B)
7506 return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
7507 (__v4sf) __B,
7508 (__v4sf)
7509 _mm_setzero_ps (),
7510 (__mmask8) __U);
7513 extern __inline __m128
7514 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7515 _mm_mask_max_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7517 return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
7518 (__v4sf) __B,
7519 (__v4sf) __W,
7520 (__mmask8) __U);
7523 extern __inline __m128
7524 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7525 _mm_maskz_max_ps (__mmask8 __U, __m128 __A, __m128 __B)
7527 return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
7528 (__v4sf) __B,
7529 (__v4sf)
7530 _mm_setzero_ps (),
7531 (__mmask8) __U);
7534 extern __inline __m128d
7535 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7536 _mm_mask_min_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7538 return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
7539 (__v2df) __B,
7540 (__v2df) __W,
7541 (__mmask8) __U);
7544 extern __inline __m128d
7545 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7546 _mm_maskz_min_pd (__mmask8 __U, __m128d __A, __m128d __B)
7548 return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
7549 (__v2df) __B,
7550 (__v2df)
7551 _mm_setzero_pd (),
7552 (__mmask8) __U);
7555 extern __inline __m128d
7556 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7557 _mm_mask_max_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7559 return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
7560 (__v2df) __B,
7561 (__v2df) __W,
7562 (__mmask8) __U);
7565 extern __inline __m128d
7566 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7567 _mm_maskz_max_pd (__mmask8 __U, __m128d __A, __m128d __B)
7569 return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
7570 (__v2df) __B,
7571 (__v2df)
7572 _mm_setzero_pd (),
7573 (__mmask8) __U);
7576 extern __inline __m128d
7577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7578 _mm_mask_mul_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7580 return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
7581 (__v2df) __B,
7582 (__v2df) __W,
7583 (__mmask8) __U);
7586 extern __inline __m128d
7587 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7588 _mm_maskz_mul_pd (__mmask8 __U, __m128d __A, __m128d __B)
7590 return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
7591 (__v2df) __B,
7592 (__v2df)
7593 _mm_setzero_pd (),
7594 (__mmask8) __U);
7597 extern __inline __m256
7598 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7599 _mm256_mask_mul_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7601 return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
7602 (__v8sf) __B,
7603 (__v8sf) __W,
7604 (__mmask8) __U);
7607 extern __inline __m256
7608 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7609 _mm256_maskz_mul_ps (__mmask8 __U, __m256 __A, __m256 __B)
7611 return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
7612 (__v8sf) __B,
7613 (__v8sf)
7614 _mm256_setzero_ps (),
7615 (__mmask8) __U);
7618 extern __inline __m256d
7619 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7620 _mm256_mask_mul_pd (__m256d __W, __mmask8 __U, __m256d __A,
7621 __m256d __B)
7623 return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
7624 (__v4df) __B,
7625 (__v4df) __W,
7626 (__mmask8) __U);
7629 extern __inline __m256d
7630 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7631 _mm256_maskz_mul_pd (__mmask8 __U, __m256d __A, __m256d __B)
7633 return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
7634 (__v4df) __B,
7635 (__v4df)
7636 _mm256_setzero_pd (),
7637 (__mmask8) __U);
7640 extern __inline __m256i
7641 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7642 _mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B)
7644 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
7645 (__v4di) __B,
7646 (__v4di)
7647 _mm256_setzero_si256 (),
7648 __M);
7651 extern __inline __m256i
7652 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7653 _mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
7654 __m256i __B)
7656 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
7657 (__v4di) __B,
7658 (__v4di) __W, __M);
7661 extern __inline __m256i
7662 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7663 _mm256_min_epi64 (__m256i __A, __m256i __B)
7665 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
7666 (__v4di) __B,
7667 (__v4di)
7668 _mm256_setzero_si256 (),
7669 (__mmask8) -1);
7672 extern __inline __m256i
7673 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7674 _mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
7675 __m256i __B)
7677 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
7678 (__v4di) __B,
7679 (__v4di) __W, __M);
7682 extern __inline __m256i
7683 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7684 _mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B)
7686 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
7687 (__v4di) __B,
7688 (__v4di)
7689 _mm256_setzero_si256 (),
7690 __M);
7693 extern __inline __m256i
7694 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7695 _mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B)
7697 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
7698 (__v4di) __B,
7699 (__v4di)
7700 _mm256_setzero_si256 (),
7701 __M);
7704 extern __inline __m256i
7705 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7706 _mm256_max_epi64 (__m256i __A, __m256i __B)
7708 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
7709 (__v4di) __B,
7710 (__v4di)
7711 _mm256_setzero_si256 (),
7712 (__mmask8) -1);
7715 extern __inline __m256i
7716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7717 _mm256_max_epu64 (__m256i __A, __m256i __B)
7719 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
7720 (__v4di) __B,
7721 (__v4di)
7722 _mm256_setzero_si256 (),
7723 (__mmask8) -1);
7726 extern __inline __m256i
7727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7728 _mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
7729 __m256i __B)
7731 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
7732 (__v4di) __B,
7733 (__v4di) __W, __M);
7736 extern __inline __m256i
7737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7738 _mm256_min_epu64 (__m256i __A, __m256i __B)
7740 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
7741 (__v4di) __B,
7742 (__v4di)
7743 _mm256_setzero_si256 (),
7744 (__mmask8) -1);
7747 extern __inline __m256i
7748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7749 _mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
7750 __m256i __B)
7752 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
7753 (__v4di) __B,
7754 (__v4di) __W, __M);
7757 extern __inline __m256i
7758 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7759 _mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B)
7761 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
7762 (__v4di) __B,
7763 (__v4di)
7764 _mm256_setzero_si256 (),
7765 __M);
7768 extern __inline __m256i
7769 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7770 _mm256_maskz_max_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
7772 return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
7773 (__v8si) __B,
7774 (__v8si)
7775 _mm256_setzero_si256 (),
7776 __M);
7779 extern __inline __m256i
7780 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7781 _mm256_mask_max_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
7782 __m256i __B)
7784 return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
7785 (__v8si) __B,
7786 (__v8si) __W, __M);
7789 extern __inline __m256i
7790 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7791 _mm256_maskz_min_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
7793 return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
7794 (__v8si) __B,
7795 (__v8si)
7796 _mm256_setzero_si256 (),
7797 __M);
7800 extern __inline __m256i
7801 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7802 _mm256_mask_min_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
7803 __m256i __B)
7805 return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
7806 (__v8si) __B,
7807 (__v8si) __W, __M);
7810 extern __inline __m256i
7811 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7812 _mm256_maskz_max_epu32 (__mmask8 __M, __m256i __A, __m256i __B)
7814 return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
7815 (__v8si) __B,
7816 (__v8si)
7817 _mm256_setzero_si256 (),
7818 __M);
7821 extern __inline __m256i
7822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7823 _mm256_mask_max_epu32 (__m256i __W, __mmask8 __M, __m256i __A,
7824 __m256i __B)
7826 return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
7827 (__v8si) __B,
7828 (__v8si) __W, __M);
7831 extern __inline __m256i
7832 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7833 _mm256_maskz_min_epu32 (__mmask8 __M, __m256i __A, __m256i __B)
7835 return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
7836 (__v8si) __B,
7837 (__v8si)
7838 _mm256_setzero_si256 (),
7839 __M);
7842 extern __inline __m256i
7843 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7844 _mm256_mask_min_epu32 (__m256i __W, __mmask8 __M, __m256i __A,
7845 __m256i __B)
7847 return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
7848 (__v8si) __B,
7849 (__v8si) __W, __M);
7852 extern __inline __m128i
7853 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7854 _mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B)
7856 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
7857 (__v2di) __B,
7858 (__v2di)
7859 _mm_setzero_si128 (),
7860 __M);
7863 extern __inline __m128i
7864 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7865 _mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
7866 __m128i __B)
7868 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
7869 (__v2di) __B,
7870 (__v2di) __W, __M);
7873 extern __inline __m128i
7874 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7875 _mm_min_epi64 (__m128i __A, __m128i __B)
7877 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
7878 (__v2di) __B,
7879 (__v2di)
7880 _mm_setzero_di (),
7881 (__mmask8) -1);
7884 extern __inline __m128i
7885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7886 _mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
7887 __m128i __B)
7889 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
7890 (__v2di) __B,
7891 (__v2di) __W, __M);
7894 extern __inline __m128i
7895 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7896 _mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B)
7898 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
7899 (__v2di) __B,
7900 (__v2di)
7901 _mm_setzero_si128 (),
7902 __M);
7905 extern __inline __m128i
7906 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7907 _mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B)
7909 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
7910 (__v2di) __B,
7911 (__v2di)
7912 _mm_setzero_si128 (),
7913 __M);
7916 extern __inline __m128i
7917 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7918 _mm_max_epi64 (__m128i __A, __m128i __B)
7920 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
7921 (__v2di) __B,
7922 (__v2di)
7923 _mm_setzero_di (),
7924 (__mmask8) -1);
7927 extern __inline __m128i
7928 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7929 _mm_max_epu64 (__m128i __A, __m128i __B)
7931 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
7932 (__v2di) __B,
7933 (__v2di)
7934 _mm_setzero_di (),
7935 (__mmask8) -1);
7938 extern __inline __m128i
7939 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7940 _mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
7941 __m128i __B)
7943 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
7944 (__v2di) __B,
7945 (__v2di) __W, __M);
7948 extern __inline __m128i
7949 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7950 _mm_min_epu64 (__m128i __A, __m128i __B)
7952 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
7953 (__v2di) __B,
7954 (__v2di)
7955 _mm_setzero_di (),
7956 (__mmask8) -1);
7959 extern __inline __m128i
7960 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7961 _mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
7962 __m128i __B)
7964 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
7965 (__v2di) __B,
7966 (__v2di) __W, __M);
7969 extern __inline __m128i
7970 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7971 _mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B)
7973 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
7974 (__v2di) __B,
7975 (__v2di)
7976 _mm_setzero_si128 (),
7977 __M);
7980 extern __inline __m128i
7981 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7982 _mm_maskz_max_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
7984 return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
7985 (__v4si) __B,
7986 (__v4si)
7987 _mm_setzero_si128 (),
7988 __M);
7991 extern __inline __m128i
7992 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7993 _mm_mask_max_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
7994 __m128i __B)
7996 return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
7997 (__v4si) __B,
7998 (__v4si) __W, __M);
8001 extern __inline __m128i
8002 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8003 _mm_maskz_min_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
8005 return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
8006 (__v4si) __B,
8007 (__v4si)
8008 _mm_setzero_si128 (),
8009 __M);
8012 extern __inline __m128i
8013 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8014 _mm_mask_min_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
8015 __m128i __B)
8017 return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
8018 (__v4si) __B,
8019 (__v4si) __W, __M);
8022 extern __inline __m128i
8023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8024 _mm_maskz_max_epu32 (__mmask8 __M, __m128i __A, __m128i __B)
8026 return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
8027 (__v4si) __B,
8028 (__v4si)
8029 _mm_setzero_si128 (),
8030 __M);
8033 extern __inline __m128i
8034 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8035 _mm_mask_max_epu32 (__m128i __W, __mmask8 __M, __m128i __A,
8036 __m128i __B)
8038 return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
8039 (__v4si) __B,
8040 (__v4si) __W, __M);
8043 extern __inline __m128i
8044 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8045 _mm_maskz_min_epu32 (__mmask8 __M, __m128i __A, __m128i __B)
8047 return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
8048 (__v4si) __B,
8049 (__v4si)
8050 _mm_setzero_si128 (),
8051 __M);
8054 extern __inline __m128i
8055 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8056 _mm_mask_min_epu32 (__m128i __W, __mmask8 __M, __m128i __A,
8057 __m128i __B)
8059 return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
8060 (__v4si) __B,
8061 (__v4si) __W, __M);
8064 #ifndef __AVX512CD__
8065 #pragma GCC push_options
8066 #pragma GCC target("avx512vl,avx512cd")
8067 #define __DISABLE_AVX512VLCD__
8068 #endif
8070 extern __inline __m128i
8071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8072 _mm_broadcastmb_epi64 (__mmask8 __A)
8074 return (__m128i) __builtin_ia32_broadcastmb128 (__A);
8077 extern __inline __m256i
8078 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8079 _mm256_broadcastmb_epi64 (__mmask8 __A)
8081 return (__m256i) __builtin_ia32_broadcastmb256 (__A);
8084 extern __inline __m128i
8085 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8086 _mm_broadcastmw_epi32 (__mmask16 __A)
8088 return (__m128i) __builtin_ia32_broadcastmw128 (__A);
8091 extern __inline __m256i
8092 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8093 _mm256_broadcastmw_epi32 (__mmask16 __A)
8095 return (__m256i) __builtin_ia32_broadcastmw256 (__A);
8098 extern __inline __m256i
8099 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8100 _mm256_lzcnt_epi32 (__m256i __A)
8102 return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
8103 (__v8si)
8104 _mm256_setzero_si256 (),
8105 (__mmask8) -1);
8108 extern __inline __m256i
8109 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8110 _mm256_mask_lzcnt_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
8112 return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
8113 (__v8si) __W,
8114 (__mmask8) __U);
8117 extern __inline __m256i
8118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8119 _mm256_maskz_lzcnt_epi32 (__mmask8 __U, __m256i __A)
8121 return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
8122 (__v8si)
8123 _mm256_setzero_si256 (),
8124 (__mmask8) __U);
8127 extern __inline __m256i
8128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8129 _mm256_lzcnt_epi64 (__m256i __A)
8131 return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
8132 (__v4di)
8133 _mm256_setzero_si256 (),
8134 (__mmask8) -1);
8137 extern __inline __m256i
8138 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8139 _mm256_mask_lzcnt_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
8141 return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
8142 (__v4di) __W,
8143 (__mmask8) __U);
8146 extern __inline __m256i
8147 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8148 _mm256_maskz_lzcnt_epi64 (__mmask8 __U, __m256i __A)
8150 return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
8151 (__v4di)
8152 _mm256_setzero_si256 (),
8153 (__mmask8) __U);
8156 extern __inline __m256i
8157 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8158 _mm256_conflict_epi64 (__m256i __A)
8160 return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
8161 (__v4di)
8162 _mm256_setzero_si256 (),
8163 (__mmask8) -
8167 extern __inline __m256i
8168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8169 _mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
8171 return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
8172 (__v4di) __W,
8173 (__mmask8)
8174 __U);
8177 extern __inline __m256i
8178 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8179 _mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A)
8181 return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
8182 (__v4di)
8183 _mm256_setzero_si256 (),
8184 (__mmask8)
8185 __U);
8188 extern __inline __m256i
8189 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8190 _mm256_conflict_epi32 (__m256i __A)
8192 return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
8193 (__v8si)
8194 _mm256_setzero_si256 (),
8195 (__mmask8) -
8199 extern __inline __m256i
8200 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8201 _mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
8203 return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
8204 (__v8si) __W,
8205 (__mmask8)
8206 __U);
8209 extern __inline __m256i
8210 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8211 _mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A)
8213 return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
8214 (__v8si)
8215 _mm256_setzero_si256 (),
8216 (__mmask8)
8217 __U);
8220 extern __inline __m128i
8221 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8222 _mm_lzcnt_epi32 (__m128i __A)
8224 return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
8225 (__v4si)
8226 _mm_setzero_si128 (),
8227 (__mmask8) -1);
8230 extern __inline __m128i
8231 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8232 _mm_mask_lzcnt_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
8234 return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
8235 (__v4si) __W,
8236 (__mmask8) __U);
8239 extern __inline __m128i
8240 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8241 _mm_maskz_lzcnt_epi32 (__mmask8 __U, __m128i __A)
8243 return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
8244 (__v4si)
8245 _mm_setzero_si128 (),
8246 (__mmask8) __U);
8249 extern __inline __m128i
8250 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8251 _mm_lzcnt_epi64 (__m128i __A)
8253 return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
8254 (__v2di)
8255 _mm_setzero_di (),
8256 (__mmask8) -1);
8259 extern __inline __m128i
8260 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8261 _mm_mask_lzcnt_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
8263 return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
8264 (__v2di) __W,
8265 (__mmask8) __U);
8268 extern __inline __m128i
8269 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8270 _mm_maskz_lzcnt_epi64 (__mmask8 __U, __m128i __A)
8272 return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
8273 (__v2di)
8274 _mm_setzero_di (),
8275 (__mmask8) __U);
8278 extern __inline __m128i
8279 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8280 _mm_conflict_epi64 (__m128i __A)
8282 return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
8283 (__v2di)
8284 _mm_setzero_di (),
8285 (__mmask8) -
8289 extern __inline __m128i
8290 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8291 _mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
8293 return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
8294 (__v2di) __W,
8295 (__mmask8)
8296 __U);
8299 extern __inline __m128i
8300 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8301 _mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A)
8303 return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
8304 (__v2di)
8305 _mm_setzero_di (),
8306 (__mmask8)
8307 __U);
8310 extern __inline __m128i
8311 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8312 _mm_conflict_epi32 (__m128i __A)
8314 return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
8315 (__v4si)
8316 _mm_setzero_si128 (),
8317 (__mmask8) -
8321 extern __inline __m128i
8322 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8323 _mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
8325 return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
8326 (__v4si) __W,
8327 (__mmask8)
8328 __U);
8331 extern __inline __m128i
8332 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8333 _mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A)
8335 return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
8336 (__v4si)
8337 _mm_setzero_si128 (),
8338 (__mmask8)
8339 __U);
8342 #ifdef __DISABLE_AVX512VLCD__
8343 #pragma GCC pop_options
8344 #endif
8346 extern __inline __m256d
8347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8348 _mm256_mask_unpacklo_pd (__m256d __W, __mmask8 __U, __m256d __A,
8349 __m256d __B)
8351 return (__m256d) __builtin_ia32_unpcklpd256_mask ((__v4df) __A,
8352 (__v4df) __B,
8353 (__v4df) __W,
8354 (__mmask8) __U);
8357 extern __inline __m256d
8358 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8359 _mm256_maskz_unpacklo_pd (__mmask8 __U, __m256d __A, __m256d __B)
8361 return (__m256d) __builtin_ia32_unpcklpd256_mask ((__v4df) __A,
8362 (__v4df) __B,
8363 (__v4df)
8364 _mm256_setzero_pd (),
8365 (__mmask8) __U);
8368 extern __inline __m128d
8369 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8370 _mm_mask_unpacklo_pd (__m128d __W, __mmask8 __U, __m128d __A,
8371 __m128d __B)
8373 return (__m128d) __builtin_ia32_unpcklpd128_mask ((__v2df) __A,
8374 (__v2df) __B,
8375 (__v2df) __W,
8376 (__mmask8) __U);
8379 extern __inline __m128d
8380 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8381 _mm_maskz_unpacklo_pd (__mmask8 __U, __m128d __A, __m128d __B)
8383 return (__m128d) __builtin_ia32_unpcklpd128_mask ((__v2df) __A,
8384 (__v2df) __B,
8385 (__v2df)
8386 _mm_setzero_pd (),
8387 (__mmask8) __U);
8390 extern __inline __m256
8391 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8392 _mm256_mask_unpacklo_ps (__m256 __W, __mmask8 __U, __m256 __A,
8393 __m256 __B)
8395 return (__m256) __builtin_ia32_unpcklps256_mask ((__v8sf) __A,
8396 (__v8sf) __B,
8397 (__v8sf) __W,
8398 (__mmask8) __U);
8401 extern __inline __m256d
8402 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8403 _mm256_mask_unpackhi_pd (__m256d __W, __mmask8 __U, __m256d __A,
8404 __m256d __B)
8406 return (__m256d) __builtin_ia32_unpckhpd256_mask ((__v4df) __A,
8407 (__v4df) __B,
8408 (__v4df) __W,
8409 (__mmask8) __U);
8412 extern __inline __m256d
8413 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8414 _mm256_maskz_unpackhi_pd (__mmask8 __U, __m256d __A, __m256d __B)
8416 return (__m256d) __builtin_ia32_unpckhpd256_mask ((__v4df) __A,
8417 (__v4df) __B,
8418 (__v4df)
8419 _mm256_setzero_pd (),
8420 (__mmask8) __U);
8423 extern __inline __m128d
8424 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8425 _mm_mask_unpackhi_pd (__m128d __W, __mmask8 __U, __m128d __A,
8426 __m128d __B)
8428 return (__m128d) __builtin_ia32_unpckhpd128_mask ((__v2df) __A,
8429 (__v2df) __B,
8430 (__v2df) __W,
8431 (__mmask8) __U);
8434 extern __inline __m128d
8435 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8436 _mm_maskz_unpackhi_pd (__mmask8 __U, __m128d __A, __m128d __B)
8438 return (__m128d) __builtin_ia32_unpckhpd128_mask ((__v2df) __A,
8439 (__v2df) __B,
8440 (__v2df)
8441 _mm_setzero_pd (),
8442 (__mmask8) __U);
8445 extern __inline __m256
8446 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8447 _mm256_mask_unpackhi_ps (__m256 __W, __mmask8 __U, __m256 __A,
8448 __m256 __B)
8450 return (__m256) __builtin_ia32_unpckhps256_mask ((__v8sf) __A,
8451 (__v8sf) __B,
8452 (__v8sf) __W,
8453 (__mmask8) __U);
8456 extern __inline __m256
8457 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8458 _mm256_maskz_unpackhi_ps (__mmask8 __U, __m256 __A, __m256 __B)
8460 return (__m256) __builtin_ia32_unpckhps256_mask ((__v8sf) __A,
8461 (__v8sf) __B,
8462 (__v8sf)
8463 _mm256_setzero_ps (),
8464 (__mmask8) __U);
8467 extern __inline __m128
8468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8469 _mm_mask_unpackhi_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8471 return (__m128) __builtin_ia32_unpckhps128_mask ((__v4sf) __A,
8472 (__v4sf) __B,
8473 (__v4sf) __W,
8474 (__mmask8) __U);
8477 extern __inline __m128
8478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8479 _mm_maskz_unpackhi_ps (__mmask8 __U, __m128 __A, __m128 __B)
8481 return (__m128) __builtin_ia32_unpckhps128_mask ((__v4sf) __A,
8482 (__v4sf) __B,
8483 (__v4sf)
8484 _mm_setzero_ps (),
8485 (__mmask8) __U);
8488 extern __inline __m128
8489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8490 _mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A)
8492 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8493 (__v4sf) __W,
8494 (__mmask8) __U);
8497 extern __inline __m128
8498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8499 _mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8501 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8502 (__v4sf)
8503 _mm_setzero_ps (),
8504 (__mmask8) __U);
8507 extern __inline __m256
8508 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8509 _mm256_maskz_unpacklo_ps (__mmask8 __U, __m256 __A, __m256 __B)
8511 return (__m256) __builtin_ia32_unpcklps256_mask ((__v8sf) __A,
8512 (__v8sf) __B,
8513 (__v8sf)
8514 _mm256_setzero_ps (),
8515 (__mmask8) __U);
8518 extern __inline __m256
8519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8520 _mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A)
8522 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8523 (__v8sf) __W,
8524 (__mmask8) __U);
8527 extern __inline __m256
8528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8529 _mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8531 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8532 (__v8sf)
8533 _mm256_setzero_ps (),
8534 (__mmask8) __U);
8537 extern __inline __m128
8538 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8539 _mm_mask_unpacklo_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8541 return (__m128) __builtin_ia32_unpcklps128_mask ((__v4sf) __A,
8542 (__v4sf) __B,
8543 (__v4sf) __W,
8544 (__mmask8) __U);
8547 extern __inline __m128
8548 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8549 _mm_maskz_unpacklo_ps (__mmask8 __U, __m128 __A, __m128 __B)
8551 return (__m128) __builtin_ia32_unpcklps128_mask ((__v4sf) __A,
8552 (__v4sf) __B,
8553 (__v4sf)
8554 _mm_setzero_ps (),
8555 (__mmask8) __U);
8558 extern __inline __m256i
8559 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8560 _mm256_mask_sra_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
8561 __m128i __B)
8563 return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A,
8564 (__v4si) __B,
8565 (__v8si) __W,
8566 (__mmask8) __U);
8569 extern __inline __m256i
8570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8571 _mm256_maskz_sra_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
8573 return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A,
8574 (__v4si) __B,
8575 (__v8si)
8576 _mm256_setzero_si256 (),
8577 (__mmask8) __U);
8580 extern __inline __m128i
8581 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8582 _mm_mask_sra_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
8583 __m128i __B)
8585 return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A,
8586 (__v4si) __B,
8587 (__v4si) __W,
8588 (__mmask8) __U);
8591 extern __inline __m128i
8592 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8593 _mm_maskz_sra_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
8595 return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A,
8596 (__v4si) __B,
8597 (__v4si)
8598 _mm_setzero_si128 (),
8599 (__mmask8) __U);
8602 extern __inline __m256i
8603 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8604 _mm256_sra_epi64 (__m256i __A, __m128i __B)
8606 return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
8607 (__v2di) __B,
8608 (__v4di)
8609 _mm256_setzero_si256 (),
8610 (__mmask8) -1);
8613 extern __inline __m256i
8614 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8615 _mm256_mask_sra_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
8616 __m128i __B)
8618 return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
8619 (__v2di) __B,
8620 (__v4di) __W,
8621 (__mmask8) __U);
8624 extern __inline __m256i
8625 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8626 _mm256_maskz_sra_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
8628 return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
8629 (__v2di) __B,
8630 (__v4di)
8631 _mm256_setzero_si256 (),
8632 (__mmask8) __U);
8635 extern __inline __m128i
8636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8637 _mm_sra_epi64 (__m128i __A, __m128i __B)
8639 return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
8640 (__v2di) __B,
8641 (__v2di)
8642 _mm_setzero_di (),
8643 (__mmask8) -1);
8646 extern __inline __m128i
8647 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8648 _mm_mask_sra_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
8649 __m128i __B)
8651 return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
8652 (__v2di) __B,
8653 (__v2di) __W,
8654 (__mmask8) __U);
8657 extern __inline __m128i
8658 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8659 _mm_maskz_sra_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
8661 return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
8662 (__v2di) __B,
8663 (__v2di)
8664 _mm_setzero_di (),
8665 (__mmask8) __U);
8668 extern __inline __m128i
8669 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8670 _mm_mask_sll_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
8671 __m128i __B)
8673 return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A,
8674 (__v4si) __B,
8675 (__v4si) __W,
8676 (__mmask8) __U);
8679 extern __inline __m128i
8680 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8681 _mm_maskz_sll_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
8683 return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A,
8684 (__v4si) __B,
8685 (__v4si)
8686 _mm_setzero_si128 (),
8687 (__mmask8) __U);
8690 extern __inline __m128i
8691 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8692 _mm_mask_sll_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
8693 __m128i __B)
8695 return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
8696 (__v2di) __B,
8697 (__v2di) __W,
8698 (__mmask8) __U);
8701 extern __inline __m128i
8702 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8703 _mm_maskz_sll_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
8705 return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
8706 (__v2di) __B,
8707 (__v2di)
8708 _mm_setzero_di (),
8709 (__mmask8) __U);
8712 extern __inline __m256i
8713 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8714 _mm256_mask_sll_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
8715 __m128i __B)
8717 return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A,
8718 (__v4si) __B,
8719 (__v8si) __W,
8720 (__mmask8) __U);
8723 extern __inline __m256i
8724 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8725 _mm256_maskz_sll_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
8727 return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A,
8728 (__v4si) __B,
8729 (__v8si)
8730 _mm256_setzero_si256 (),
8731 (__mmask8) __U);
8734 extern __inline __m256i
8735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8736 _mm256_mask_sll_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
8737 __m128i __B)
8739 return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A,
8740 (__v2di) __B,
8741 (__v4di) __W,
8742 (__mmask8) __U);
8745 extern __inline __m256i
8746 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8747 _mm256_maskz_sll_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
8749 return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A,
8750 (__v2di) __B,
8751 (__v4di)
8752 _mm256_setzero_si256 (),
8753 (__mmask8) __U);
8756 extern __inline __m256
8757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8758 _mm256_mask_permutexvar_ps (__m256 __W, __mmask8 __U, __m256i __X,
8759 __m256 __Y)
8761 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
8762 (__v8si) __X,
8763 (__v8sf) __W,
8764 (__mmask8) __U);
8767 extern __inline __m256
8768 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8769 _mm256_maskz_permutexvar_ps (__mmask8 __U, __m256i __X, __m256 __Y)
8771 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
8772 (__v8si) __X,
8773 (__v8sf)
8774 _mm256_setzero_ps (),
8775 (__mmask8) __U);
8778 extern __inline __m256d
8779 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8780 _mm256_permutexvar_pd (__m256i __X, __m256d __Y)
8782 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8783 (__v4di) __X,
8784 (__v4df)
8785 _mm256_setzero_pd (),
8786 (__mmask8) -1);
8789 extern __inline __m256d
8790 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8791 _mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X,
8792 __m256d __Y)
8794 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8795 (__v4di) __X,
8796 (__v4df) __W,
8797 (__mmask8) __U);
8800 extern __inline __m256d
8801 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8802 _mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y)
8804 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8805 (__v4di) __X,
8806 (__v4df)
8807 _mm256_setzero_pd (),
8808 (__mmask8) __U);
8811 extern __inline __m256d
8812 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8813 _mm256_mask_permutevar_pd (__m256d __W, __mmask8 __U, __m256d __A,
8814 __m256i __C)
8816 return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
8817 (__v4di) __C,
8818 (__v4df) __W,
8819 (__mmask8)
8820 __U);
8823 extern __inline __m256d
8824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8825 _mm256_maskz_permutevar_pd (__mmask8 __U, __m256d __A, __m256i __C)
8827 return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
8828 (__v4di) __C,
8829 (__v4df)
8830 _mm256_setzero_pd (),
8831 (__mmask8)
8832 __U);
8835 extern __inline __m256
8836 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8837 _mm256_mask_permutevar_ps (__m256 __W, __mmask8 __U, __m256 __A,
8838 __m256i __C)
8840 return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
8841 (__v8si) __C,
8842 (__v8sf) __W,
8843 (__mmask8) __U);
8846 extern __inline __m256
8847 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8848 _mm256_maskz_permutevar_ps (__mmask8 __U, __m256 __A, __m256i __C)
8850 return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
8851 (__v8si) __C,
8852 (__v8sf)
8853 _mm256_setzero_ps (),
8854 (__mmask8) __U);
8857 extern __inline __m128d
8858 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8859 _mm_mask_permutevar_pd (__m128d __W, __mmask8 __U, __m128d __A,
8860 __m128i __C)
8862 return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
8863 (__v2di) __C,
8864 (__v2df) __W,
8865 (__mmask8) __U);
8868 extern __inline __m128d
8869 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8870 _mm_maskz_permutevar_pd (__mmask8 __U, __m128d __A, __m128i __C)
8872 return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
8873 (__v2di) __C,
8874 (__v2df)
8875 _mm_setzero_pd (),
8876 (__mmask8) __U);
8879 extern __inline __m128
8880 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8881 _mm_mask_permutevar_ps (__m128 __W, __mmask8 __U, __m128 __A,
8882 __m128i __C)
8884 return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
8885 (__v4si) __C,
8886 (__v4sf) __W,
8887 (__mmask8) __U);
8890 extern __inline __m128
8891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8892 _mm_maskz_permutevar_ps (__mmask8 __U, __m128 __A, __m128i __C)
8894 return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
8895 (__v4si) __C,
8896 (__v4sf)
8897 _mm_setzero_ps (),
8898 (__mmask8) __U);
8901 extern __inline __m256i
8902 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8903 _mm256_maskz_mullo_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
8905 return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
8906 (__v8si) __B,
8907 (__v8si)
8908 _mm256_setzero_si256 (),
8909 __M);
8912 extern __inline __m256i
8913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8914 _mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y)
8916 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
8917 (__v4di) __X,
8918 (__v4di)
8919 _mm256_setzero_si256 (),
8920 __M);
8923 extern __inline __m256i
8924 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8925 _mm256_mask_mullo_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
8926 __m256i __B)
8928 return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
8929 (__v8si) __B,
8930 (__v8si) __W, __M);
8933 extern __inline __m128i
8934 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8935 _mm_maskz_mullo_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
8937 return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
8938 (__v4si) __B,
8939 (__v4si)
8940 _mm_setzero_si128 (),
8941 __M);
8944 extern __inline __m128i
8945 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8946 _mm_mask_mullo_epi32 (__m128i __W, __mmask16 __M, __m128i __A,
8947 __m128i __B)
8949 return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
8950 (__v4si) __B,
8951 (__v4si) __W, __M);
8954 extern __inline __m256i
8955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8956 _mm256_mask_mul_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
8957 __m256i __Y)
8959 return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
8960 (__v8si) __Y,
8961 (__v4di) __W, __M);
8964 extern __inline __m256i
8965 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8966 _mm256_maskz_mul_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
8968 return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
8969 (__v8si) __Y,
8970 (__v4di)
8971 _mm256_setzero_si256 (),
8972 __M);
8975 extern __inline __m128i
8976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8977 _mm_mask_mul_epi32 (__m128i __W, __mmask8 __M, __m128i __X,
8978 __m128i __Y)
8980 return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
8981 (__v4si) __Y,
8982 (__v2di) __W, __M);
8985 extern __inline __m128i
8986 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8987 _mm_maskz_mul_epi32 (__mmask8 __M, __m128i __X, __m128i __Y)
8989 return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
8990 (__v4si) __Y,
8991 (__v2di)
8992 _mm_setzero_si128 (),
8993 __M);
8996 extern __inline __m256i
8997 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8998 _mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X,
8999 __m256i __Y)
9001 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
9002 (__v4di) __X,
9003 (__v4di) __W,
9004 __M);
9007 extern __inline __m256i
9008 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9009 _mm256_mask_mul_epu32 (__m256i __W, __mmask8 __M, __m256i __X,
9010 __m256i __Y)
9012 return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
9013 (__v8si) __Y,
9014 (__v4di) __W, __M);
9017 extern __inline __m256i
9018 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9019 _mm256_maskz_permutexvar_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
9021 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
9022 (__v8si) __X,
9023 (__v8si)
9024 _mm256_setzero_si256 (),
9025 __M);
9028 extern __inline __m256i
9029 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9030 _mm256_maskz_mul_epu32 (__mmask8 __M, __m256i __X, __m256i __Y)
9032 return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
9033 (__v8si) __Y,
9034 (__v4di)
9035 _mm256_setzero_si256 (),
9036 __M);
9039 extern __inline __m128i
9040 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9041 _mm_mask_mul_epu32 (__m128i __W, __mmask8 __M, __m128i __X,
9042 __m128i __Y)
9044 return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
9045 (__v4si) __Y,
9046 (__v2di) __W, __M);
9049 extern __inline __m128i
9050 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9051 _mm_maskz_mul_epu32 (__mmask8 __M, __m128i __X, __m128i __Y)
9053 return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
9054 (__v4si) __Y,
9055 (__v2di)
9056 _mm_setzero_si128 (),
9057 __M);
9060 extern __inline __m256i
9061 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9062 _mm256_mask_permutexvar_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
9063 __m256i __Y)
9065 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
9066 (__v8si) __X,
9067 (__v8si) __W,
9068 __M);
9071 #ifdef __OPTIMIZE__
9072 extern __inline __m256i
9073 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9074 _mm256_mask_permutex_epi64 (__m256i __W, __mmask8 __M,
9075 __m256i __X, const int __I)
9077 return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
9078 __I,
9079 (__v4di) __W,
9080 (__mmask8) __M);
9083 extern __inline __m256i
9084 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9085 _mm256_maskz_permutex_epi64 (__mmask8 __M, __m256i __X, const int __I)
9087 return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
9088 __I,
9089 (__v4di)
9090 _mm256_setzero_si256 (),
9091 (__mmask8) __M);
9094 extern __inline __m256d
9095 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9096 _mm256_mask_shuffle_pd (__m256d __W, __mmask8 __U, __m256d __A,
9097 __m256d __B, const int __imm)
9099 return (__m256d) __builtin_ia32_shufpd256_mask ((__v4df) __A,
9100 (__v4df) __B, __imm,
9101 (__v4df) __W,
9102 (__mmask8) __U);
9105 extern __inline __m256d
9106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9107 _mm256_maskz_shuffle_pd (__mmask8 __U, __m256d __A, __m256d __B,
9108 const int __imm)
9110 return (__m256d) __builtin_ia32_shufpd256_mask ((__v4df) __A,
9111 (__v4df) __B, __imm,
9112 (__v4df)
9113 _mm256_setzero_pd (),
9114 (__mmask8) __U);
9117 extern __inline __m128d
9118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9119 _mm_mask_shuffle_pd (__m128d __W, __mmask8 __U, __m128d __A,
9120 __m128d __B, const int __imm)
9122 return (__m128d) __builtin_ia32_shufpd128_mask ((__v2df) __A,
9123 (__v2df) __B, __imm,
9124 (__v2df) __W,
9125 (__mmask8) __U);
9128 extern __inline __m128d
9129 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9130 _mm_maskz_shuffle_pd (__mmask8 __U, __m128d __A, __m128d __B,
9131 const int __imm)
9133 return (__m128d) __builtin_ia32_shufpd128_mask ((__v2df) __A,
9134 (__v2df) __B, __imm,
9135 (__v2df)
9136 _mm_setzero_pd (),
9137 (__mmask8) __U);
9140 extern __inline __m256
9141 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9142 _mm256_mask_shuffle_ps (__m256 __W, __mmask8 __U, __m256 __A,
9143 __m256 __B, const int __imm)
9145 return (__m256) __builtin_ia32_shufps256_mask ((__v8sf) __A,
9146 (__v8sf) __B, __imm,
9147 (__v8sf) __W,
9148 (__mmask8) __U);
9151 extern __inline __m256
9152 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9153 _mm256_maskz_shuffle_ps (__mmask8 __U, __m256 __A, __m256 __B,
9154 const int __imm)
9156 return (__m256) __builtin_ia32_shufps256_mask ((__v8sf) __A,
9157 (__v8sf) __B, __imm,
9158 (__v8sf)
9159 _mm256_setzero_ps (),
9160 (__mmask8) __U);
9163 extern __inline __m128
9164 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9165 _mm_mask_shuffle_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
9166 const int __imm)
9168 return (__m128) __builtin_ia32_shufps128_mask ((__v4sf) __A,
9169 (__v4sf) __B, __imm,
9170 (__v4sf) __W,
9171 (__mmask8) __U);
9174 extern __inline __m128
9175 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9176 _mm_maskz_shuffle_ps (__mmask8 __U, __m128 __A, __m128 __B,
9177 const int __imm)
9179 return (__m128) __builtin_ia32_shufps128_mask ((__v4sf) __A,
9180 (__v4sf) __B, __imm,
9181 (__v4sf)
9182 _mm_setzero_ps (),
9183 (__mmask8) __U);
9186 extern __inline __m256i
9187 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9188 _mm256_inserti32x4 (__m256i __A, __m128i __B, const int __imm)
9190 return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
9191 (__v4si) __B,
9192 __imm,
9193 (__v8si)
9194 _mm256_setzero_si256 (),
9195 (__mmask8) -
9199 extern __inline __m256i
9200 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9201 _mm256_mask_inserti32x4 (__m256i __W, __mmask8 __U, __m256i __A,
9202 __m128i __B, const int __imm)
9204 return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
9205 (__v4si) __B,
9206 __imm,
9207 (__v8si) __W,
9208 (__mmask8)
9209 __U);
9212 extern __inline __m256i
9213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9214 _mm256_maskz_inserti32x4 (__mmask8 __U, __m256i __A, __m128i __B,
9215 const int __imm)
9217 return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
9218 (__v4si) __B,
9219 __imm,
9220 (__v8si)
9221 _mm256_setzero_si256 (),
9222 (__mmask8)
9223 __U);
9226 extern __inline __m256
9227 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9228 _mm256_insertf32x4 (__m256 __A, __m128 __B, const int __imm)
9230 return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
9231 (__v4sf) __B,
9232 __imm,
9233 (__v8sf)
9234 _mm256_setzero_ps (),
9235 (__mmask8) -1);
9238 extern __inline __m256
9239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9240 _mm256_mask_insertf32x4 (__m256 __W, __mmask8 __U, __m256 __A,
9241 __m128 __B, const int __imm)
9243 return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
9244 (__v4sf) __B,
9245 __imm,
9246 (__v8sf) __W,
9247 (__mmask8) __U);
9250 extern __inline __m256
9251 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9252 _mm256_maskz_insertf32x4 (__mmask8 __U, __m256 __A, __m128 __B,
9253 const int __imm)
9255 return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
9256 (__v4sf) __B,
9257 __imm,
9258 (__v8sf)
9259 _mm256_setzero_ps (),
9260 (__mmask8) __U);
9263 extern __inline __m128i
9264 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9265 _mm256_extracti32x4_epi32 (__m256i __A, const int __imm)
9267 return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
9268 __imm,
9269 (__v4si)
9270 _mm_setzero_si128 (),
9271 (__mmask8) -
9275 extern __inline __m128i
9276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9277 _mm256_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m256i __A,
9278 const int __imm)
9280 return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
9281 __imm,
9282 (__v4si) __W,
9283 (__mmask8)
9284 __U);
9287 extern __inline __m128i
9288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9289 _mm256_maskz_extracti32x4_epi32 (__mmask8 __U, __m256i __A,
9290 const int __imm)
9292 return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
9293 __imm,
9294 (__v4si)
9295 _mm_setzero_si128 (),
9296 (__mmask8)
9297 __U);
9300 extern __inline __m128
9301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9302 _mm256_extractf32x4_ps (__m256 __A, const int __imm)
9304 return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
9305 __imm,
9306 (__v4sf)
9307 _mm_setzero_ps (),
9308 (__mmask8) -
9312 extern __inline __m128
9313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9314 _mm256_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m256 __A,
9315 const int __imm)
9317 return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
9318 __imm,
9319 (__v4sf) __W,
9320 (__mmask8)
9321 __U);
9324 extern __inline __m128
9325 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9326 _mm256_maskz_extractf32x4_ps (__mmask8 __U, __m256 __A,
9327 const int __imm)
9329 return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
9330 __imm,
9331 (__v4sf)
9332 _mm_setzero_ps (),
9333 (__mmask8)
9334 __U);
9337 extern __inline __m256i
9338 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9339 _mm256_shuffle_i64x2 (__m256i __A, __m256i __B, const int __imm)
9341 return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
9342 (__v4di) __B,
9343 __imm,
9344 (__v4di)
9345 _mm256_setzero_si256 (),
9346 (__mmask8) -1);
9349 extern __inline __m256i
9350 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9351 _mm256_mask_shuffle_i64x2 (__m256i __W, __mmask8 __U, __m256i __A,
9352 __m256i __B, const int __imm)
9354 return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
9355 (__v4di) __B,
9356 __imm,
9357 (__v4di) __W,
9358 (__mmask8) __U);
9361 extern __inline __m256i
9362 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9363 _mm256_maskz_shuffle_i64x2 (__mmask8 __U, __m256i __A, __m256i __B,
9364 const int __imm)
9366 return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
9367 (__v4di) __B,
9368 __imm,
9369 (__v4di)
9370 _mm256_setzero_si256 (),
9371 (__mmask8) __U);
9374 extern __inline __m256i
9375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9376 _mm256_shuffle_i32x4 (__m256i __A, __m256i __B, const int __imm)
9378 return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
9379 (__v8si) __B,
9380 __imm,
9381 (__v8si)
9382 _mm256_setzero_si256 (),
9383 (__mmask8) -1);
9386 extern __inline __m256i
9387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9388 _mm256_mask_shuffle_i32x4 (__m256i __W, __mmask8 __U, __m256i __A,
9389 __m256i __B, const int __imm)
9391 return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
9392 (__v8si) __B,
9393 __imm,
9394 (__v8si) __W,
9395 (__mmask8) __U);
9398 extern __inline __m256i
9399 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9400 _mm256_maskz_shuffle_i32x4 (__mmask8 __U, __m256i __A, __m256i __B,
9401 const int __imm)
9403 return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
9404 (__v8si) __B,
9405 __imm,
9406 (__v8si)
9407 _mm256_setzero_si256 (),
9408 (__mmask8) __U);
9411 extern __inline __m256d
9412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9413 _mm256_shuffle_f64x2 (__m256d __A, __m256d __B, const int __imm)
9415 return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
9416 (__v4df) __B,
9417 __imm,
9418 (__v4df)
9419 _mm256_setzero_pd (),
9420 (__mmask8) -1);
9423 extern __inline __m256d
9424 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9425 _mm256_mask_shuffle_f64x2 (__m256d __W, __mmask8 __U, __m256d __A,
9426 __m256d __B, const int __imm)
9428 return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
9429 (__v4df) __B,
9430 __imm,
9431 (__v4df) __W,
9432 (__mmask8) __U);
9435 extern __inline __m256d
9436 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9437 _mm256_maskz_shuffle_f64x2 (__mmask8 __U, __m256d __A, __m256d __B,
9438 const int __imm)
9440 return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
9441 (__v4df) __B,
9442 __imm,
9443 (__v4df)
9444 _mm256_setzero_pd (),
9445 (__mmask8) __U);
9448 extern __inline __m256
9449 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9450 _mm256_shuffle_f32x4 (__m256 __A, __m256 __B, const int __imm)
9452 return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
9453 (__v8sf) __B,
9454 __imm,
9455 (__v8sf)
9456 _mm256_setzero_ps (),
9457 (__mmask8) -1);
9460 extern __inline __m256
9461 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9462 _mm256_mask_shuffle_f32x4 (__m256 __W, __mmask8 __U, __m256 __A,
9463 __m256 __B, const int __imm)
9465 return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
9466 (__v8sf) __B,
9467 __imm,
9468 (__v8sf) __W,
9469 (__mmask8) __U);
9472 extern __inline __m256
9473 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9474 _mm256_maskz_shuffle_f32x4 (__mmask8 __U, __m256 __A, __m256 __B,
9475 const int __imm)
9477 return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
9478 (__v8sf) __B,
9479 __imm,
9480 (__v8sf)
9481 _mm256_setzero_ps (),
9482 (__mmask8) __U);
9485 extern __inline __m256d
9486 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9487 _mm256_fixupimm_pd (__m256d __A, __m256d __B, __m256i __C,
9488 const int __imm)
9490 return (__m256d) __builtin_ia32_fixupimmpd256 ((__v4df) __A,
9491 (__v4df) __B,
9492 (__v4di) __C, __imm);
9495 extern __inline __m256d
9496 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9497 _mm256_mask_fixupimm_pd (__m256d __A, __mmask8 __U, __m256d __B,
9498 __m256i __C, const int __imm)
9500 return (__m256d) __builtin_ia32_fixupimmpd256_mask ((__v4df) __A,
9501 (__v4df) __B,
9502 (__v4di) __C,
9503 __imm,
9504 (__mmask8) __U);
9507 extern __inline __m256d
9508 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9509 _mm256_maskz_fixupimm_pd (__mmask8 __U, __m256d __A, __m256d __B,
9510 __m256i __C, const int __imm)
9512 return (__m256d) __builtin_ia32_fixupimmpd256_maskz ((__v4df) __A,
9513 (__v4df) __B,
9514 (__v4di) __C,
9515 __imm,
9516 (__mmask8) __U);
9519 extern __inline __m256
9520 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9521 _mm256_fixupimm_ps (__m256 __A, __m256 __B, __m256i __C,
9522 const int __imm)
9524 return (__m256) __builtin_ia32_fixupimmps256 ((__v8sf) __A,
9525 (__v8sf) __B,
9526 (__v8si) __C, __imm);
9529 extern __inline __m256
9530 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9531 _mm256_mask_fixupimm_ps (__m256 __A, __mmask8 __U, __m256 __B,
9532 __m256i __C, const int __imm)
9534 return (__m256) __builtin_ia32_fixupimmps256_mask ((__v8sf) __A,
9535 (__v8sf) __B,
9536 (__v8si) __C,
9537 __imm,
9538 (__mmask8) __U);
9541 extern __inline __m256
9542 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9543 _mm256_maskz_fixupimm_ps (__mmask8 __U, __m256 __A, __m256 __B,
9544 __m256i __C, const int __imm)
9546 return (__m256) __builtin_ia32_fixupimmps256_maskz ((__v8sf) __A,
9547 (__v8sf) __B,
9548 (__v8si) __C,
9549 __imm,
9550 (__mmask8) __U);
9553 extern __inline __m128d
9554 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9555 _mm_fixupimm_pd (__m128d __A, __m128d __B, __m128i __C,
9556 const int __imm)
9558 return (__m128d) __builtin_ia32_fixupimmpd128 ((__v2df) __A,
9559 (__v2df) __B,
9560 (__v2di) __C, __imm);
9563 extern __inline __m128d
9564 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9565 _mm_mask_fixupimm_pd (__m128d __A, __mmask8 __U, __m128d __B,
9566 __m128i __C, const int __imm)
9568 return (__m128d) __builtin_ia32_fixupimmpd128_mask ((__v2df) __A,
9569 (__v2df) __B,
9570 (__v2di) __C,
9571 __imm,
9572 (__mmask8) __U);
9575 extern __inline __m128d
9576 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9577 _mm_maskz_fixupimm_pd (__mmask8 __U, __m128d __A, __m128d __B,
9578 __m128i __C, const int __imm)
9580 return (__m128d) __builtin_ia32_fixupimmpd128_maskz ((__v2df) __A,
9581 (__v2df) __B,
9582 (__v2di) __C,
9583 __imm,
9584 (__mmask8) __U);
9587 extern __inline __m128
9588 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9589 _mm_fixupimm_ps (__m128 __A, __m128 __B, __m128i __C, const int __imm)
9591 return (__m128) __builtin_ia32_fixupimmps128 ((__v4sf) __A,
9592 (__v4sf) __B,
9593 (__v4si) __C, __imm);
9596 extern __inline __m128
9597 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9598 _mm_mask_fixupimm_ps (__m128 __A, __mmask8 __U, __m128 __B,
9599 __m128i __C, const int __imm)
9601 return (__m128) __builtin_ia32_fixupimmps128_mask ((__v4sf) __A,
9602 (__v4sf) __B,
9603 (__v4si) __C,
9604 __imm,
9605 (__mmask8) __U);
9608 extern __inline __m128
9609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9610 _mm_maskz_fixupimm_ps (__mmask8 __U, __m128 __A, __m128 __B,
9611 __m128i __C, const int __imm)
9613 return (__m128) __builtin_ia32_fixupimmps128_maskz ((__v4sf) __A,
9614 (__v4sf) __B,
9615 (__v4si) __C,
9616 __imm,
9617 (__mmask8) __U);
9620 extern __inline __m256i
9621 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9622 _mm256_mask_srli_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
9623 const int __imm)
9625 return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm,
9626 (__v8si) __W,
9627 (__mmask8) __U);
9630 extern __inline __m256i
9631 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9632 _mm256_maskz_srli_epi32 (__mmask8 __U, __m256i __A, const int __imm)
9634 return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm,
9635 (__v8si)
9636 _mm256_setzero_si256 (),
9637 (__mmask8) __U);
9640 extern __inline __m128i
9641 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9642 _mm_mask_srli_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
9643 const int __imm)
9645 return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm,
9646 (__v4si) __W,
9647 (__mmask8) __U);
9650 extern __inline __m128i
9651 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9652 _mm_maskz_srli_epi32 (__mmask8 __U, __m128i __A, const int __imm)
9654 return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm,
9655 (__v4si)
9656 _mm_setzero_si128 (),
9657 (__mmask8) __U);
9660 extern __inline __m256i
9661 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9662 _mm256_mask_srli_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
9663 const int __imm)
9665 return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm,
9666 (__v4di) __W,
9667 (__mmask8) __U);
9670 extern __inline __m256i
9671 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9672 _mm256_maskz_srli_epi64 (__mmask8 __U, __m256i __A, const int __imm)
9674 return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm,
9675 (__v4di)
9676 _mm256_setzero_si256 (),
9677 (__mmask8) __U);
9680 extern __inline __m128i
9681 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9682 _mm_mask_srli_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
9683 const int __imm)
9685 return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm,
9686 (__v2di) __W,
9687 (__mmask8) __U);
9690 extern __inline __m128i
9691 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9692 _mm_maskz_srli_epi64 (__mmask8 __U, __m128i __A, const int __imm)
9694 return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm,
9695 (__v2di)
9696 _mm_setzero_si128 (),
9697 (__mmask8) __U);
9700 extern __inline __m256i
9701 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9702 _mm256_ternarylogic_epi64 (__m256i __A, __m256i __B, __m256i __C,
9703 const int imm)
9705 return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A,
9706 (__v4di) __B,
9707 (__v4di) __C, imm,
9708 (__mmask8) -1);
9711 extern __inline __m256i
9712 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9713 _mm256_mask_ternarylogic_epi64 (__m256i __A, __mmask8 __U,
9714 __m256i __B, __m256i __C,
9715 const int imm)
9717 return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A,
9718 (__v4di) __B,
9719 (__v4di) __C, imm,
9720 (__mmask8) __U);
9723 extern __inline __m256i
9724 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9725 _mm256_maskz_ternarylogic_epi64 (__mmask8 __U, __m256i __A,
9726 __m256i __B, __m256i __C,
9727 const int imm)
9729 return (__m256i) __builtin_ia32_pternlogq256_maskz ((__v4di) __A,
9730 (__v4di) __B,
9731 (__v4di) __C,
9732 imm,
9733 (__mmask8) __U);
9736 extern __inline __m256i
9737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9738 _mm256_ternarylogic_epi32 (__m256i __A, __m256i __B, __m256i __C,
9739 const int imm)
9741 return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A,
9742 (__v8si) __B,
9743 (__v8si) __C, imm,
9744 (__mmask8) -1);
9747 extern __inline __m256i
9748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9749 _mm256_mask_ternarylogic_epi32 (__m256i __A, __mmask8 __U,
9750 __m256i __B, __m256i __C,
9751 const int imm)
9753 return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A,
9754 (__v8si) __B,
9755 (__v8si) __C, imm,
9756 (__mmask8) __U);
9759 extern __inline __m256i
9760 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9761 _mm256_maskz_ternarylogic_epi32 (__mmask8 __U, __m256i __A,
9762 __m256i __B, __m256i __C,
9763 const int imm)
9765 return (__m256i) __builtin_ia32_pternlogd256_maskz ((__v8si) __A,
9766 (__v8si) __B,
9767 (__v8si) __C,
9768 imm,
9769 (__mmask8) __U);
9772 extern __inline __m128i
9773 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9774 _mm_ternarylogic_epi64 (__m128i __A, __m128i __B, __m128i __C,
9775 const int imm)
9777 return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A,
9778 (__v2di) __B,
9779 (__v2di) __C, imm,
9780 (__mmask8) -1);
9783 extern __inline __m128i
9784 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9785 _mm_mask_ternarylogic_epi64 (__m128i __A, __mmask8 __U,
9786 __m128i __B, __m128i __C, const int imm)
9788 return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A,
9789 (__v2di) __B,
9790 (__v2di) __C, imm,
9791 (__mmask8) __U);
9794 extern __inline __m128i
9795 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9796 _mm_maskz_ternarylogic_epi64 (__mmask8 __U, __m128i __A,
9797 __m128i __B, __m128i __C, const int imm)
9799 return (__m128i) __builtin_ia32_pternlogq128_maskz ((__v2di) __A,
9800 (__v2di) __B,
9801 (__v2di) __C,
9802 imm,
9803 (__mmask8) __U);
9806 extern __inline __m128i
9807 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9808 _mm_ternarylogic_epi32 (__m128i __A, __m128i __B, __m128i __C,
9809 const int imm)
9811 return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A,
9812 (__v4si) __B,
9813 (__v4si) __C, imm,
9814 (__mmask8) -1);
9817 extern __inline __m128i
9818 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9819 _mm_mask_ternarylogic_epi32 (__m128i __A, __mmask8 __U,
9820 __m128i __B, __m128i __C, const int imm)
9822 return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A,
9823 (__v4si) __B,
9824 (__v4si) __C, imm,
9825 (__mmask8) __U);
9828 extern __inline __m128i
9829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9830 _mm_maskz_ternarylogic_epi32 (__mmask8 __U, __m128i __A,
9831 __m128i __B, __m128i __C, const int imm)
9833 return (__m128i) __builtin_ia32_pternlogd128_maskz ((__v4si) __A,
9834 (__v4si) __B,
9835 (__v4si) __C,
9836 imm,
9837 (__mmask8) __U);
9840 extern __inline __m256
9841 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9842 _mm256_roundscale_ps (__m256 __A, const int __imm)
9844 return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
9845 __imm,
9846 (__v8sf)
9847 _mm256_setzero_ps (),
9848 (__mmask8) -1);
9851 extern __inline __m256
9852 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9853 _mm256_mask_roundscale_ps (__m256 __W, __mmask8 __U, __m256 __A,
9854 const int __imm)
9856 return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
9857 __imm,
9858 (__v8sf) __W,
9859 (__mmask8) __U);
9862 extern __inline __m256
9863 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9864 _mm256_maskz_roundscale_ps (__mmask8 __U, __m256 __A, const int __imm)
9866 return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
9867 __imm,
9868 (__v8sf)
9869 _mm256_setzero_ps (),
9870 (__mmask8) __U);
9873 extern __inline __m256d
9874 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9875 _mm256_roundscale_pd (__m256d __A, const int __imm)
9877 return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
9878 __imm,
9879 (__v4df)
9880 _mm256_setzero_pd (),
9881 (__mmask8) -1);
9884 extern __inline __m256d
9885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9886 _mm256_mask_roundscale_pd (__m256d __W, __mmask8 __U, __m256d __A,
9887 const int __imm)
9889 return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
9890 __imm,
9891 (__v4df) __W,
9892 (__mmask8) __U);
9895 extern __inline __m256d
9896 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9897 _mm256_maskz_roundscale_pd (__mmask8 __U, __m256d __A, const int __imm)
9899 return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
9900 __imm,
9901 (__v4df)
9902 _mm256_setzero_pd (),
9903 (__mmask8) __U);
9906 extern __inline __m128
9907 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9908 _mm_roundscale_ps (__m128 __A, const int __imm)
9910 return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
9911 __imm,
9912 (__v4sf)
9913 _mm_setzero_ps (),
9914 (__mmask8) -1);
9917 extern __inline __m128
9918 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9919 _mm_mask_roundscale_ps (__m128 __W, __mmask8 __U, __m128 __A,
9920 const int __imm)
9922 return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
9923 __imm,
9924 (__v4sf) __W,
9925 (__mmask8) __U);
9928 extern __inline __m128
9929 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9930 _mm_maskz_roundscale_ps (__mmask8 __U, __m128 __A, const int __imm)
9932 return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
9933 __imm,
9934 (__v4sf)
9935 _mm_setzero_ps (),
9936 (__mmask8) __U);
9939 extern __inline __m128d
9940 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9941 _mm_roundscale_pd (__m128d __A, const int __imm)
9943 return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
9944 __imm,
9945 (__v2df)
9946 _mm_setzero_pd (),
9947 (__mmask8) -1);
9950 extern __inline __m128d
9951 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9952 _mm_mask_roundscale_pd (__m128d __W, __mmask8 __U, __m128d __A,
9953 const int __imm)
9955 return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
9956 __imm,
9957 (__v2df) __W,
9958 (__mmask8) __U);
9961 extern __inline __m128d
9962 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9963 _mm_maskz_roundscale_pd (__mmask8 __U, __m128d __A, const int __imm)
9965 return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
9966 __imm,
9967 (__v2df)
9968 _mm_setzero_pd (),
9969 (__mmask8) __U);
9972 extern __inline __m256
9973 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9974 _mm256_getmant_ps (__m256 __A, _MM_MANTISSA_NORM_ENUM __B,
9975 _MM_MANTISSA_SIGN_ENUM __C)
9977 return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
9978 (__C << 2) | __B,
9979 (__v8sf)
9980 _mm256_setzero_ps (),
9981 (__mmask8) -1);
9984 extern __inline __m256
9985 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9986 _mm256_mask_getmant_ps (__m256 __W, __mmask8 __U, __m256 __A,
9987 _MM_MANTISSA_NORM_ENUM __B,
9988 _MM_MANTISSA_SIGN_ENUM __C)
9990 return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
9991 (__C << 2) | __B,
9992 (__v8sf) __W,
9993 (__mmask8) __U);
9996 extern __inline __m256
9997 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9998 _mm256_maskz_getmant_ps (__mmask8 __U, __m256 __A,
9999 _MM_MANTISSA_NORM_ENUM __B,
10000 _MM_MANTISSA_SIGN_ENUM __C)
10002 return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
10003 (__C << 2) | __B,
10004 (__v8sf)
10005 _mm256_setzero_ps (),
10006 (__mmask8) __U);
10009 extern __inline __m128
10010 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10011 _mm_getmant_ps (__m128 __A, _MM_MANTISSA_NORM_ENUM __B,
10012 _MM_MANTISSA_SIGN_ENUM __C)
10014 return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
10015 (__C << 2) | __B,
10016 (__v4sf)
10017 _mm_setzero_ps (),
10018 (__mmask8) -1);
10021 extern __inline __m128
10022 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10023 _mm_mask_getmant_ps (__m128 __W, __mmask8 __U, __m128 __A,
10024 _MM_MANTISSA_NORM_ENUM __B,
10025 _MM_MANTISSA_SIGN_ENUM __C)
10027 return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
10028 (__C << 2) | __B,
10029 (__v4sf) __W,
10030 (__mmask8) __U);
10033 extern __inline __m128
10034 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10035 _mm_maskz_getmant_ps (__mmask8 __U, __m128 __A,
10036 _MM_MANTISSA_NORM_ENUM __B,
10037 _MM_MANTISSA_SIGN_ENUM __C)
10039 return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
10040 (__C << 2) | __B,
10041 (__v4sf)
10042 _mm_setzero_ps (),
10043 (__mmask8) __U);
10046 extern __inline __m256d
10047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10048 _mm256_getmant_pd (__m256d __A, _MM_MANTISSA_NORM_ENUM __B,
10049 _MM_MANTISSA_SIGN_ENUM __C)
10051 return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
10052 (__C << 2) | __B,
10053 (__v4df)
10054 _mm256_setzero_pd (),
10055 (__mmask8) -1);
10058 extern __inline __m256d
10059 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10060 _mm256_mask_getmant_pd (__m256d __W, __mmask8 __U, __m256d __A,
10061 _MM_MANTISSA_NORM_ENUM __B,
10062 _MM_MANTISSA_SIGN_ENUM __C)
10064 return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
10065 (__C << 2) | __B,
10066 (__v4df) __W,
10067 (__mmask8) __U);
10070 extern __inline __m256d
10071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10072 _mm256_maskz_getmant_pd (__mmask8 __U, __m256d __A,
10073 _MM_MANTISSA_NORM_ENUM __B,
10074 _MM_MANTISSA_SIGN_ENUM __C)
10076 return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
10077 (__C << 2) | __B,
10078 (__v4df)
10079 _mm256_setzero_pd (),
10080 (__mmask8) __U);
10083 extern __inline __m128d
10084 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10085 _mm_getmant_pd (__m128d __A, _MM_MANTISSA_NORM_ENUM __B,
10086 _MM_MANTISSA_SIGN_ENUM __C)
10088 return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
10089 (__C << 2) | __B,
10090 (__v2df)
10091 _mm_setzero_pd (),
10092 (__mmask8) -1);
10095 extern __inline __m128d
10096 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10097 _mm_mask_getmant_pd (__m128d __W, __mmask8 __U, __m128d __A,
10098 _MM_MANTISSA_NORM_ENUM __B,
10099 _MM_MANTISSA_SIGN_ENUM __C)
10101 return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
10102 (__C << 2) | __B,
10103 (__v2df) __W,
10104 (__mmask8) __U);
10107 extern __inline __m128d
10108 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10109 _mm_maskz_getmant_pd (__mmask8 __U, __m128d __A,
10110 _MM_MANTISSA_NORM_ENUM __B,
10111 _MM_MANTISSA_SIGN_ENUM __C)
10113 return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
10114 (__C << 2) | __B,
10115 (__v2df)
10116 _mm_setzero_pd (),
10117 (__mmask8) __U);
10120 extern __inline __m256
10121 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10122 _mm256_mmask_i32gather_ps (__m256 __v1_old, __mmask8 __mask,
10123 __m256i __index, float const *__addr,
10124 int __scale)
10126 return (__m256) __builtin_ia32_gather3siv8sf ((__v8sf) __v1_old,
10127 __addr,
10128 (__v8si) __index,
10129 __mask, __scale);
10132 extern __inline __m128
10133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10134 _mm_mmask_i32gather_ps (__m128 __v1_old, __mmask8 __mask,
10135 __m128i __index, float const *__addr,
10136 int __scale)
10138 return (__m128) __builtin_ia32_gather3siv4sf ((__v4sf) __v1_old,
10139 __addr,
10140 (__v4si) __index,
10141 __mask, __scale);
10144 extern __inline __m256d
10145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10146 _mm256_mmask_i32gather_pd (__m256d __v1_old, __mmask8 __mask,
10147 __m128i __index, double const *__addr,
10148 int __scale)
10150 return (__m256d) __builtin_ia32_gather3siv4df ((__v4df) __v1_old,
10151 __addr,
10152 (__v4si) __index,
10153 __mask, __scale);
10156 extern __inline __m128d
10157 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10158 _mm_mmask_i32gather_pd (__m128d __v1_old, __mmask8 __mask,
10159 __m128i __index, double const *__addr,
10160 int __scale)
10162 return (__m128d) __builtin_ia32_gather3siv2df ((__v2df) __v1_old,
10163 __addr,
10164 (__v4si) __index,
10165 __mask, __scale);
10168 extern __inline __m128
10169 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10170 _mm256_mmask_i64gather_ps (__m128 __v1_old, __mmask8 __mask,
10171 __m256i __index, float const *__addr,
10172 int __scale)
10174 return (__m128) __builtin_ia32_gather3div8sf ((__v4sf) __v1_old,
10175 __addr,
10176 (__v4di) __index,
10177 __mask, __scale);
10180 extern __inline __m128
10181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10182 _mm_mmask_i64gather_ps (__m128 __v1_old, __mmask8 __mask,
10183 __m128i __index, float const *__addr,
10184 int __scale)
10186 return (__m128) __builtin_ia32_gather3div4sf ((__v4sf) __v1_old,
10187 __addr,
10188 (__v2di) __index,
10189 __mask, __scale);
10192 extern __inline __m256d
10193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10194 _mm256_mmask_i64gather_pd (__m256d __v1_old, __mmask8 __mask,
10195 __m256i __index, double const *__addr,
10196 int __scale)
10198 return (__m256d) __builtin_ia32_gather3div4df ((__v4df) __v1_old,
10199 __addr,
10200 (__v4di) __index,
10201 __mask, __scale);
10204 extern __inline __m128d
10205 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10206 _mm_mmask_i64gather_pd (__m128d __v1_old, __mmask8 __mask,
10207 __m128i __index, double const *__addr,
10208 int __scale)
10210 return (__m128d) __builtin_ia32_gather3div2df ((__v2df) __v1_old,
10211 __addr,
10212 (__v2di) __index,
10213 __mask, __scale);
10216 extern __inline __m256i
10217 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10218 _mm256_mmask_i32gather_epi32 (__m256i __v1_old, __mmask8 __mask,
10219 __m256i __index, int const *__addr,
10220 int __scale)
10222 return (__m256i) __builtin_ia32_gather3siv8si ((__v8si) __v1_old,
10223 __addr,
10224 (__v8si) __index,
10225 __mask, __scale);
10228 extern __inline __m128i
10229 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10230 _mm_mmask_i32gather_epi32 (__m128i __v1_old, __mmask8 __mask,
10231 __m128i __index, int const *__addr,
10232 int __scale)
10234 return (__m128i) __builtin_ia32_gather3siv4si ((__v4si) __v1_old,
10235 __addr,
10236 (__v4si) __index,
10237 __mask, __scale);
10240 extern __inline __m256i
10241 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10242 _mm256_mmask_i32gather_epi64 (__m256i __v1_old, __mmask8 __mask,
10243 __m128i __index, long long const *__addr,
10244 int __scale)
10246 return (__m256i) __builtin_ia32_gather3siv4di ((__v4di) __v1_old,
10247 __addr,
10248 (__v4si) __index,
10249 __mask, __scale);
10252 extern __inline __m128i
10253 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10254 _mm_mmask_i32gather_epi64 (__m128i __v1_old, __mmask8 __mask,
10255 __m128i __index, long long const *__addr,
10256 int __scale)
10258 return (__m128i) __builtin_ia32_gather3siv2di ((__v2di) __v1_old,
10259 __addr,
10260 (__v4si) __index,
10261 __mask, __scale);
10264 extern __inline __m128i
10265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10266 _mm256_mmask_i64gather_epi32 (__m128i __v1_old, __mmask8 __mask,
10267 __m256i __index, int const *__addr,
10268 int __scale)
10270 return (__m128i) __builtin_ia32_gather3div8si ((__v4si) __v1_old,
10271 __addr,
10272 (__v4di) __index,
10273 __mask, __scale);
10276 extern __inline __m128i
10277 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10278 _mm_mmask_i64gather_epi32 (__m128i __v1_old, __mmask8 __mask,
10279 __m128i __index, int const *__addr,
10280 int __scale)
10282 return (__m128i) __builtin_ia32_gather3div4si ((__v4si) __v1_old,
10283 __addr,
10284 (__v2di) __index,
10285 __mask, __scale);
10288 extern __inline __m256i
10289 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10290 _mm256_mmask_i64gather_epi64 (__m256i __v1_old, __mmask8 __mask,
10291 __m256i __index, long long const *__addr,
10292 int __scale)
10294 return (__m256i) __builtin_ia32_gather3div4di ((__v4di) __v1_old,
10295 __addr,
10296 (__v4di) __index,
10297 __mask, __scale);
10300 extern __inline __m128i
10301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10302 _mm_mmask_i64gather_epi64 (__m128i __v1_old, __mmask8 __mask,
10303 __m128i __index, long long const *__addr,
10304 int __scale)
10306 return (__m128i) __builtin_ia32_gather3div2di ((__v2di) __v1_old,
10307 __addr,
10308 (__v2di) __index,
10309 __mask, __scale);
10312 extern __inline void
10313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10314 _mm256_i32scatter_ps (float *__addr, __m256i __index,
10315 __m256 __v1, const int __scale)
10317 __builtin_ia32_scattersiv8sf (__addr, (__mmask8) 0xFF,
10318 (__v8si) __index, (__v8sf) __v1,
10319 __scale);
10322 extern __inline void
10323 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10324 _mm256_mask_i32scatter_ps (float *__addr, __mmask8 __mask,
10325 __m256i __index, __m256 __v1,
10326 const int __scale)
10328 __builtin_ia32_scattersiv8sf (__addr, __mask, (__v8si) __index,
10329 (__v8sf) __v1, __scale);
10332 extern __inline void
10333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10334 _mm_i32scatter_ps (float *__addr, __m128i __index, __m128 __v1,
10335 const int __scale)
10337 __builtin_ia32_scattersiv4sf (__addr, (__mmask8) 0xFF,
10338 (__v4si) __index, (__v4sf) __v1,
10339 __scale);
10342 extern __inline void
10343 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10344 _mm_mask_i32scatter_ps (float *__addr, __mmask8 __mask,
10345 __m128i __index, __m128 __v1,
10346 const int __scale)
10348 __builtin_ia32_scattersiv4sf (__addr, __mask, (__v4si) __index,
10349 (__v4sf) __v1, __scale);
10352 extern __inline void
10353 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10354 _mm256_i32scatter_pd (double *__addr, __m128i __index,
10355 __m256d __v1, const int __scale)
10357 __builtin_ia32_scattersiv4df (__addr, (__mmask8) 0xFF,
10358 (__v4si) __index, (__v4df) __v1,
10359 __scale);
10362 extern __inline void
10363 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10364 _mm256_mask_i32scatter_pd (double *__addr, __mmask8 __mask,
10365 __m128i __index, __m256d __v1,
10366 const int __scale)
10368 __builtin_ia32_scattersiv4df (__addr, __mask, (__v4si) __index,
10369 (__v4df) __v1, __scale);
10372 extern __inline void
10373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10374 _mm_i32scatter_pd (double *__addr, __m128i __index,
10375 __m128d __v1, const int __scale)
10377 __builtin_ia32_scattersiv2df (__addr, (__mmask8) 0xFF,
10378 (__v4si) __index, (__v2df) __v1,
10379 __scale);
10382 extern __inline void
10383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10384 _mm_mask_i32scatter_pd (double *__addr, __mmask8 __mask,
10385 __m128i __index, __m128d __v1,
10386 const int __scale)
10388 __builtin_ia32_scattersiv2df (__addr, __mask, (__v4si) __index,
10389 (__v2df) __v1, __scale);
10392 extern __inline void
10393 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10394 _mm256_i64scatter_ps (float *__addr, __m256i __index,
10395 __m128 __v1, const int __scale)
10397 __builtin_ia32_scatterdiv8sf (__addr, (__mmask8) 0xFF,
10398 (__v4di) __index, (__v4sf) __v1,
10399 __scale);
10402 extern __inline void
10403 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10404 _mm256_mask_i64scatter_ps (float *__addr, __mmask8 __mask,
10405 __m256i __index, __m128 __v1,
10406 const int __scale)
10408 __builtin_ia32_scatterdiv8sf (__addr, __mask, (__v4di) __index,
10409 (__v4sf) __v1, __scale);
10412 extern __inline void
10413 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10414 _mm_i64scatter_ps (float *__addr, __m128i __index, __m128 __v1,
10415 const int __scale)
10417 __builtin_ia32_scatterdiv4sf (__addr, (__mmask8) 0xFF,
10418 (__v2di) __index, (__v4sf) __v1,
10419 __scale);
10422 extern __inline void
10423 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10424 _mm_mask_i64scatter_ps (float *__addr, __mmask8 __mask,
10425 __m128i __index, __m128 __v1,
10426 const int __scale)
10428 __builtin_ia32_scatterdiv4sf (__addr, __mask, (__v2di) __index,
10429 (__v4sf) __v1, __scale);
10432 extern __inline void
10433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10434 _mm256_i64scatter_pd (double *__addr, __m256i __index,
10435 __m256d __v1, const int __scale)
10437 __builtin_ia32_scatterdiv4df (__addr, (__mmask8) 0xFF,
10438 (__v4di) __index, (__v4df) __v1,
10439 __scale);
10442 extern __inline void
10443 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10444 _mm256_mask_i64scatter_pd (double *__addr, __mmask8 __mask,
10445 __m256i __index, __m256d __v1,
10446 const int __scale)
10448 __builtin_ia32_scatterdiv4df (__addr, __mask, (__v4di) __index,
10449 (__v4df) __v1, __scale);
10452 extern __inline void
10453 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10454 _mm_i64scatter_pd (double *__addr, __m128i __index,
10455 __m128d __v1, const int __scale)
10457 __builtin_ia32_scatterdiv2df (__addr, (__mmask8) 0xFF,
10458 (__v2di) __index, (__v2df) __v1,
10459 __scale);
10462 extern __inline void
10463 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10464 _mm_mask_i64scatter_pd (double *__addr, __mmask8 __mask,
10465 __m128i __index, __m128d __v1,
10466 const int __scale)
10468 __builtin_ia32_scatterdiv2df (__addr, __mask, (__v2di) __index,
10469 (__v2df) __v1, __scale);
10472 extern __inline void
10473 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10474 _mm256_i32scatter_epi32 (int *__addr, __m256i __index,
10475 __m256i __v1, const int __scale)
10477 __builtin_ia32_scattersiv8si (__addr, (__mmask8) 0xFF,
10478 (__v8si) __index, (__v8si) __v1,
10479 __scale);
10482 extern __inline void
10483 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10484 _mm256_mask_i32scatter_epi32 (int *__addr, __mmask8 __mask,
10485 __m256i __index, __m256i __v1,
10486 const int __scale)
10488 __builtin_ia32_scattersiv8si (__addr, __mask, (__v8si) __index,
10489 (__v8si) __v1, __scale);
10492 extern __inline void
10493 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10494 _mm_i32scatter_epi32 (int *__addr, __m128i __index,
10495 __m128i __v1, const int __scale)
10497 __builtin_ia32_scattersiv4si (__addr, (__mmask8) 0xFF,
10498 (__v4si) __index, (__v4si) __v1,
10499 __scale);
10502 extern __inline void
10503 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10504 _mm_mask_i32scatter_epi32 (int *__addr, __mmask8 __mask,
10505 __m128i __index, __m128i __v1,
10506 const int __scale)
10508 __builtin_ia32_scattersiv4si (__addr, __mask, (__v4si) __index,
10509 (__v4si) __v1, __scale);
10512 extern __inline void
10513 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10514 _mm256_i32scatter_epi64 (long long *__addr, __m128i __index,
10515 __m256i __v1, const int __scale)
10517 __builtin_ia32_scattersiv4di (__addr, (__mmask8) 0xFF,
10518 (__v4si) __index, (__v4di) __v1,
10519 __scale);
10522 extern __inline void
10523 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10524 _mm256_mask_i32scatter_epi64 (long long *__addr, __mmask8 __mask,
10525 __m128i __index, __m256i __v1,
10526 const int __scale)
10528 __builtin_ia32_scattersiv4di (__addr, __mask, (__v4si) __index,
10529 (__v4di) __v1, __scale);
10532 extern __inline void
10533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10534 _mm_i32scatter_epi64 (long long *__addr, __m128i __index,
10535 __m128i __v1, const int __scale)
10537 __builtin_ia32_scattersiv2di (__addr, (__mmask8) 0xFF,
10538 (__v4si) __index, (__v2di) __v1,
10539 __scale);
10542 extern __inline void
10543 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10544 _mm_mask_i32scatter_epi64 (long long *__addr, __mmask8 __mask,
10545 __m128i __index, __m128i __v1,
10546 const int __scale)
10548 __builtin_ia32_scattersiv2di (__addr, __mask, (__v4si) __index,
10549 (__v2di) __v1, __scale);
10552 extern __inline void
10553 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10554 _mm256_i64scatter_epi32 (int *__addr, __m256i __index,
10555 __m128i __v1, const int __scale)
10557 __builtin_ia32_scatterdiv8si (__addr, (__mmask8) 0xFF,
10558 (__v4di) __index, (__v4si) __v1,
10559 __scale);
10562 extern __inline void
10563 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10564 _mm256_mask_i64scatter_epi32 (int *__addr, __mmask8 __mask,
10565 __m256i __index, __m128i __v1,
10566 const int __scale)
10568 __builtin_ia32_scatterdiv8si (__addr, __mask, (__v4di) __index,
10569 (__v4si) __v1, __scale);
10572 extern __inline void
10573 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10574 _mm_i64scatter_epi32 (int *__addr, __m128i __index,
10575 __m128i __v1, const int __scale)
10577 __builtin_ia32_scatterdiv4si (__addr, (__mmask8) 0xFF,
10578 (__v2di) __index, (__v4si) __v1,
10579 __scale);
10582 extern __inline void
10583 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10584 _mm_mask_i64scatter_epi32 (int *__addr, __mmask8 __mask,
10585 __m128i __index, __m128i __v1,
10586 const int __scale)
10588 __builtin_ia32_scatterdiv4si (__addr, __mask, (__v2di) __index,
10589 (__v4si) __v1, __scale);
10592 extern __inline void
10593 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10594 _mm256_i64scatter_epi64 (long long *__addr, __m256i __index,
10595 __m256i __v1, const int __scale)
10597 __builtin_ia32_scatterdiv4di (__addr, (__mmask8) 0xFF,
10598 (__v4di) __index, (__v4di) __v1,
10599 __scale);
10602 extern __inline void
10603 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10604 _mm256_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask,
10605 __m256i __index, __m256i __v1,
10606 const int __scale)
10608 __builtin_ia32_scatterdiv4di (__addr, __mask, (__v4di) __index,
10609 (__v4di) __v1, __scale);
10612 extern __inline void
10613 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10614 _mm_i64scatter_epi64 (long long *__addr, __m128i __index,
10615 __m128i __v1, const int __scale)
10617 __builtin_ia32_scatterdiv2di (__addr, (__mmask8) 0xFF,
10618 (__v2di) __index, (__v2di) __v1,
10619 __scale);
10622 extern __inline void
10623 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10624 _mm_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask,
10625 __m128i __index, __m128i __v1,
10626 const int __scale)
10628 __builtin_ia32_scatterdiv2di (__addr, __mask, (__v2di) __index,
10629 (__v2di) __v1, __scale);
10632 extern __inline __m256i
10633 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10634 _mm256_mask_shuffle_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
10635 _MM_PERM_ENUM __mask)
10637 return (__m256i) __builtin_ia32_pshufd256_mask ((__v8si) __A, __mask,
10638 (__v8si) __W,
10639 (__mmask8) __U);
10642 extern __inline __m256i
10643 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10644 _mm256_maskz_shuffle_epi32 (__mmask8 __U, __m256i __A,
10645 _MM_PERM_ENUM __mask)
10647 return (__m256i) __builtin_ia32_pshufd256_mask ((__v8si) __A, __mask,
10648 (__v8si)
10649 _mm256_setzero_si256 (),
10650 (__mmask8) __U);
10653 extern __inline __m128i
10654 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10655 _mm_mask_shuffle_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
10656 _MM_PERM_ENUM __mask)
10658 return (__m128i) __builtin_ia32_pshufd128_mask ((__v4si) __A, __mask,
10659 (__v4si) __W,
10660 (__mmask8) __U);
10663 extern __inline __m128i
10664 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10665 _mm_maskz_shuffle_epi32 (__mmask8 __U, __m128i __A,
10666 _MM_PERM_ENUM __mask)
10668 return (__m128i) __builtin_ia32_pshufd128_mask ((__v4si) __A, __mask,
10669 (__v4si)
10670 _mm_setzero_si128 (),
10671 (__mmask8) __U);
10674 extern __inline __m256i
10675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10676 _mm256_rol_epi32 (__m256i __A, const int __B)
10678 return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
10679 (__v8si)
10680 _mm256_setzero_si256 (),
10681 (__mmask8) -1);
10684 extern __inline __m256i
10685 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10686 _mm256_mask_rol_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
10687 const int __B)
10689 return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
10690 (__v8si) __W,
10691 (__mmask8) __U);
10694 extern __inline __m256i
10695 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10696 _mm256_maskz_rol_epi32 (__mmask8 __U, __m256i __A, const int __B)
10698 return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
10699 (__v8si)
10700 _mm256_setzero_si256 (),
10701 (__mmask8) __U);
10704 extern __inline __m128i
10705 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10706 _mm_rol_epi32 (__m128i __A, const int __B)
10708 return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
10709 (__v4si)
10710 _mm_setzero_si128 (),
10711 (__mmask8) -1);
10714 extern __inline __m128i
10715 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10716 _mm_mask_rol_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
10717 const int __B)
10719 return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
10720 (__v4si) __W,
10721 (__mmask8) __U);
10724 extern __inline __m128i
10725 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10726 _mm_maskz_rol_epi32 (__mmask8 __U, __m128i __A, const int __B)
10728 return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
10729 (__v4si)
10730 _mm_setzero_si128 (),
10731 (__mmask8) __U);
10734 extern __inline __m256i
10735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10736 _mm256_ror_epi32 (__m256i __A, const int __B)
10738 return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
10739 (__v8si)
10740 _mm256_setzero_si256 (),
10741 (__mmask8) -1);
10744 extern __inline __m256i
10745 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10746 _mm256_mask_ror_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
10747 const int __B)
10749 return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
10750 (__v8si) __W,
10751 (__mmask8) __U);
10754 extern __inline __m256i
10755 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10756 _mm256_maskz_ror_epi32 (__mmask8 __U, __m256i __A, const int __B)
10758 return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
10759 (__v8si)
10760 _mm256_setzero_si256 (),
10761 (__mmask8) __U);
10764 extern __inline __m128i
10765 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10766 _mm_ror_epi32 (__m128i __A, const int __B)
10768 return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
10769 (__v4si)
10770 _mm_setzero_si128 (),
10771 (__mmask8) -1);
10774 extern __inline __m128i
10775 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10776 _mm_mask_ror_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
10777 const int __B)
10779 return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
10780 (__v4si) __W,
10781 (__mmask8) __U);
10784 extern __inline __m128i
10785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10786 _mm_maskz_ror_epi32 (__mmask8 __U, __m128i __A, const int __B)
10788 return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
10789 (__v4si)
10790 _mm_setzero_si128 (),
10791 (__mmask8) __U);
10794 extern __inline __m256i
10795 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10796 _mm256_rol_epi64 (__m256i __A, const int __B)
10798 return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
10799 (__v4di)
10800 _mm256_setzero_si256 (),
10801 (__mmask8) -1);
10804 extern __inline __m256i
10805 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10806 _mm256_mask_rol_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
10807 const int __B)
10809 return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
10810 (__v4di) __W,
10811 (__mmask8) __U);
10814 extern __inline __m256i
10815 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10816 _mm256_maskz_rol_epi64 (__mmask8 __U, __m256i __A, const int __B)
10818 return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
10819 (__v4di)
10820 _mm256_setzero_si256 (),
10821 (__mmask8) __U);
10824 extern __inline __m128i
10825 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10826 _mm_rol_epi64 (__m128i __A, const int __B)
10828 return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
10829 (__v2di)
10830 _mm_setzero_di (),
10831 (__mmask8) -1);
10834 extern __inline __m128i
10835 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10836 _mm_mask_rol_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
10837 const int __B)
10839 return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
10840 (__v2di) __W,
10841 (__mmask8) __U);
10844 extern __inline __m128i
10845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10846 _mm_maskz_rol_epi64 (__mmask8 __U, __m128i __A, const int __B)
10848 return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
10849 (__v2di)
10850 _mm_setzero_di (),
10851 (__mmask8) __U);
10854 extern __inline __m256i
10855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10856 _mm256_ror_epi64 (__m256i __A, const int __B)
10858 return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
10859 (__v4di)
10860 _mm256_setzero_si256 (),
10861 (__mmask8) -1);
10864 extern __inline __m256i
10865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10866 _mm256_mask_ror_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
10867 const int __B)
10869 return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
10870 (__v4di) __W,
10871 (__mmask8) __U);
10874 extern __inline __m256i
10875 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10876 _mm256_maskz_ror_epi64 (__mmask8 __U, __m256i __A, const int __B)
10878 return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
10879 (__v4di)
10880 _mm256_setzero_si256 (),
10881 (__mmask8) __U);
10884 extern __inline __m128i
10885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10886 _mm_ror_epi64 (__m128i __A, const int __B)
10888 return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
10889 (__v2di)
10890 _mm_setzero_di (),
10891 (__mmask8) -1);
10894 extern __inline __m128i
10895 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10896 _mm_mask_ror_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
10897 const int __B)
10899 return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
10900 (__v2di) __W,
10901 (__mmask8) __U);
10904 extern __inline __m128i
10905 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10906 _mm_maskz_ror_epi64 (__mmask8 __U, __m128i __A, const int __B)
10908 return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
10909 (__v2di)
10910 _mm_setzero_di (),
10911 (__mmask8) __U);
10914 extern __inline __m128i
10915 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10916 _mm_alignr_epi32 (__m128i __A, __m128i __B, const int __imm)
10918 return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
10919 (__v4si) __B, __imm,
10920 (__v4si)
10921 _mm_setzero_si128 (),
10922 (__mmask8) -1);
10925 extern __inline __m128i
10926 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10927 _mm_mask_alignr_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
10928 __m128i __B, const int __imm)
10930 return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
10931 (__v4si) __B, __imm,
10932 (__v4si) __W,
10933 (__mmask8) __U);
10936 extern __inline __m128i
10937 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10938 _mm_maskz_alignr_epi32 (__mmask8 __U, __m128i __A, __m128i __B,
10939 const int __imm)
10941 return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
10942 (__v4si) __B, __imm,
10943 (__v4si)
10944 _mm_setzero_si128 (),
10945 (__mmask8) __U);
10948 extern __inline __m128i
10949 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10950 _mm_alignr_epi64 (__m128i __A, __m128i __B, const int __imm)
10952 return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
10953 (__v2di) __B, __imm,
10954 (__v2di)
10955 _mm_setzero_di (),
10956 (__mmask8) -1);
10959 extern __inline __m128i
10960 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10961 _mm_mask_alignr_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
10962 __m128i __B, const int __imm)
10964 return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
10965 (__v2di) __B, __imm,
10966 (__v2di) __W,
10967 (__mmask8) __U);
10970 extern __inline __m128i
10971 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10972 _mm_maskz_alignr_epi64 (__mmask8 __U, __m128i __A, __m128i __B,
10973 const int __imm)
10975 return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
10976 (__v2di) __B, __imm,
10977 (__v2di)
10978 _mm_setzero_di (),
10979 (__mmask8) __U);
10982 extern __inline __m256i
10983 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10984 _mm256_alignr_epi32 (__m256i __A, __m256i __B, const int __imm)
10986 return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
10987 (__v8si) __B, __imm,
10988 (__v8si)
10989 _mm256_setzero_si256 (),
10990 (__mmask8) -1);
10993 extern __inline __m256i
10994 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10995 _mm256_mask_alignr_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
10996 __m256i __B, const int __imm)
10998 return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
10999 (__v8si) __B, __imm,
11000 (__v8si) __W,
11001 (__mmask8) __U);
11004 extern __inline __m256i
11005 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11006 _mm256_maskz_alignr_epi32 (__mmask8 __U, __m256i __A, __m256i __B,
11007 const int __imm)
11009 return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
11010 (__v8si) __B, __imm,
11011 (__v8si)
11012 _mm256_setzero_si256 (),
11013 (__mmask8) __U);
11016 extern __inline __m256i
11017 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11018 _mm256_alignr_epi64 (__m256i __A, __m256i __B, const int __imm)
11020 return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
11021 (__v4di) __B, __imm,
11022 (__v4di)
11023 _mm256_setzero_si256 (),
11024 (__mmask8) -1);
11027 extern __inline __m256i
11028 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11029 _mm256_mask_alignr_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11030 __m256i __B, const int __imm)
11032 return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
11033 (__v4di) __B, __imm,
11034 (__v4di) __W,
11035 (__mmask8) __U);
11038 extern __inline __m256i
11039 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11040 _mm256_maskz_alignr_epi64 (__mmask8 __U, __m256i __A, __m256i __B,
11041 const int __imm)
11043 return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
11044 (__v4di) __B, __imm,
11045 (__v4di)
11046 _mm256_setzero_si256 (),
11047 (__mmask8) __U);
11050 extern __inline __m128i
11051 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11052 _mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A,
11053 const int __I)
11055 return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I,
11056 (__v8hi) __W,
11057 (__mmask8) __U);
11060 extern __inline __m128i
11061 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11062 _mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A, const int __I)
11064 return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I,
11065 (__v8hi)
11066 _mm_setzero_hi (),
11067 (__mmask8) __U);
11070 extern __inline __m128i
11071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11072 _mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A,
11073 const int __I)
11075 return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I,
11076 (__v8hi) __W,
11077 (__mmask8) __U);
11080 extern __inline __m128i
11081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11082 _mm256_maskz_cvtps_ph (__mmask8 __U, __m256 __A, const int __I)
11084 return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I,
11085 (__v8hi)
11086 _mm_setzero_hi (),
11087 (__mmask8) __U);
11090 extern __inline __m256i
11091 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11092 _mm256_mask_srai_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11093 const int __imm)
11095 return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm,
11096 (__v8si) __W,
11097 (__mmask8) __U);
11100 extern __inline __m256i
11101 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11102 _mm256_maskz_srai_epi32 (__mmask8 __U, __m256i __A, const int __imm)
11104 return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm,
11105 (__v8si)
11106 _mm256_setzero_si256 (),
11107 (__mmask8) __U);
11110 extern __inline __m128i
11111 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11112 _mm_mask_srai_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11113 const int __imm)
11115 return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm,
11116 (__v4si) __W,
11117 (__mmask8) __U);
11120 extern __inline __m128i
11121 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11122 _mm_maskz_srai_epi32 (__mmask8 __U, __m128i __A, const int __imm)
11124 return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm,
11125 (__v4si)
11126 _mm_setzero_si128 (),
11127 (__mmask8) __U);
11130 extern __inline __m256i
11131 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11132 _mm256_srai_epi64 (__m256i __A, const int __imm)
11134 return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
11135 (__v4di)
11136 _mm256_setzero_si256 (),
11137 (__mmask8) -1);
11140 extern __inline __m256i
11141 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11142 _mm256_mask_srai_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11143 const int __imm)
11145 return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
11146 (__v4di) __W,
11147 (__mmask8) __U);
11150 extern __inline __m256i
11151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11152 _mm256_maskz_srai_epi64 (__mmask8 __U, __m256i __A, const int __imm)
11154 return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
11155 (__v4di)
11156 _mm256_setzero_si256 (),
11157 (__mmask8) __U);
11160 extern __inline __m128i
11161 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11162 _mm_srai_epi64 (__m128i __A, const int __imm)
11164 return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
11165 (__v2di)
11166 _mm_setzero_di (),
11167 (__mmask8) -1);
11170 extern __inline __m128i
11171 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11172 _mm_mask_srai_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
11173 const int __imm)
11175 return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
11176 (__v2di) __W,
11177 (__mmask8) __U);
11180 extern __inline __m128i
11181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11182 _mm_maskz_srai_epi64 (__mmask8 __U, __m128i __A, const int __imm)
11184 return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
11185 (__v2di)
11186 _mm_setzero_si128 (),
11187 (__mmask8) __U);
11190 extern __inline __m128i
11191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11192 _mm_mask_slli_epi32 (__m128i __W, __mmask8 __U, __m128i __A, int __B)
11194 return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B,
11195 (__v4si) __W,
11196 (__mmask8) __U);
11199 extern __inline __m128i
11200 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11201 _mm_maskz_slli_epi32 (__mmask8 __U, __m128i __A, int __B)
11203 return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B,
11204 (__v4si)
11205 _mm_setzero_si128 (),
11206 (__mmask8) __U);
11209 extern __inline __m128i
11210 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11211 _mm_mask_slli_epi64 (__m128i __W, __mmask8 __U, __m128i __A, int __B)
11213 return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B,
11214 (__v2di) __W,
11215 (__mmask8) __U);
11218 extern __inline __m128i
11219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11220 _mm_maskz_slli_epi64 (__mmask8 __U, __m128i __A, int __B)
11222 return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B,
11223 (__v2di)
11224 _mm_setzero_di (),
11225 (__mmask8) __U);
11228 extern __inline __m256i
11229 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11230 _mm256_mask_slli_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11231 int __B)
11233 return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B,
11234 (__v8si) __W,
11235 (__mmask8) __U);
11238 extern __inline __m256i
11239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11240 _mm256_maskz_slli_epi32 (__mmask8 __U, __m256i __A, int __B)
11242 return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B,
11243 (__v8si)
11244 _mm256_setzero_si256 (),
11245 (__mmask8) __U);
11248 extern __inline __m256i
11249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11250 _mm256_mask_slli_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11251 int __B)
11253 return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B,
11254 (__v4di) __W,
11255 (__mmask8) __U);
11258 extern __inline __m256i
11259 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11260 _mm256_maskz_slli_epi64 (__mmask8 __U, __m256i __A, int __B)
11262 return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B,
11263 (__v4di)
11264 _mm256_setzero_si256 (),
11265 (__mmask8) __U);
11268 extern __inline __m256d
11269 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11270 _mm256_mask_permutex_pd (__m256d __W, __mmask8 __U, __m256d __X,
11271 const int __imm)
11273 return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __imm,
11274 (__v4df) __W,
11275 (__mmask8) __U);
11278 extern __inline __m256d
11279 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11280 _mm256_maskz_permutex_pd (__mmask8 __U, __m256d __X, const int __imm)
11282 return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __imm,
11283 (__v4df)
11284 _mm256_setzero_pd (),
11285 (__mmask8) __U);
11288 extern __inline __m256d
11289 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11290 _mm256_mask_permute_pd (__m256d __W, __mmask8 __U, __m256d __X,
11291 const int __C)
11293 return (__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df) __X, __C,
11294 (__v4df) __W,
11295 (__mmask8) __U);
11298 extern __inline __m256d
11299 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11300 _mm256_maskz_permute_pd (__mmask8 __U, __m256d __X, const int __C)
11302 return (__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df) __X, __C,
11303 (__v4df)
11304 _mm256_setzero_pd (),
11305 (__mmask8) __U);
11308 extern __inline __m128d
11309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11310 _mm_mask_permute_pd (__m128d __W, __mmask8 __U, __m128d __X,
11311 const int __C)
11313 return (__m128d) __builtin_ia32_vpermilpd_mask ((__v2df) __X, __C,
11314 (__v2df) __W,
11315 (__mmask8) __U);
11318 extern __inline __m128d
11319 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11320 _mm_maskz_permute_pd (__mmask8 __U, __m128d __X, const int __C)
11322 return (__m128d) __builtin_ia32_vpermilpd_mask ((__v2df) __X, __C,
11323 (__v2df)
11324 _mm_setzero_pd (),
11325 (__mmask8) __U);
11328 extern __inline __m256
11329 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11330 _mm256_mask_permute_ps (__m256 __W, __mmask8 __U, __m256 __X,
11331 const int __C)
11333 return (__m256) __builtin_ia32_vpermilps256_mask ((__v8sf) __X, __C,
11334 (__v8sf) __W,
11335 (__mmask8) __U);
11338 extern __inline __m256
11339 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11340 _mm256_maskz_permute_ps (__mmask8 __U, __m256 __X, const int __C)
11342 return (__m256) __builtin_ia32_vpermilps256_mask ((__v8sf) __X, __C,
11343 (__v8sf)
11344 _mm256_setzero_ps (),
11345 (__mmask8) __U);
11348 extern __inline __m128
11349 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11350 _mm_mask_permute_ps (__m128 __W, __mmask8 __U, __m128 __X,
11351 const int __C)
11353 return (__m128) __builtin_ia32_vpermilps_mask ((__v4sf) __X, __C,
11354 (__v4sf) __W,
11355 (__mmask8) __U);
11358 extern __inline __m128
11359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11360 _mm_maskz_permute_ps (__mmask8 __U, __m128 __X, const int __C)
11362 return (__m128) __builtin_ia32_vpermilps_mask ((__v4sf) __X, __C,
11363 (__v4sf)
11364 _mm_setzero_ps (),
11365 (__mmask8) __U);
11368 extern __inline __m256d
11369 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11370 _mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W)
11372 return (__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) __A,
11373 (__v4df) __W,
11374 (__mmask8) __U);
11377 extern __inline __m256
11378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11379 _mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W)
11381 return (__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) __A,
11382 (__v8sf) __W,
11383 (__mmask8) __U);
11386 extern __inline __m256i
11387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11388 _mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W)
11390 return (__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) __A,
11391 (__v4di) __W,
11392 (__mmask8) __U);
11395 extern __inline __m256i
11396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11397 _mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W)
11399 return (__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) __A,
11400 (__v8si) __W,
11401 (__mmask8) __U);
11404 extern __inline __m128d
11405 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11406 _mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W)
11408 return (__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) __A,
11409 (__v2df) __W,
11410 (__mmask8) __U);
11413 extern __inline __m128
11414 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11415 _mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W)
11417 return (__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) __A,
11418 (__v4sf) __W,
11419 (__mmask8) __U);
11422 extern __inline __m128i
11423 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11424 _mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W)
11426 return (__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) __A,
11427 (__v2di) __W,
11428 (__mmask8) __U);
11431 extern __inline __m128i
11432 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11433 _mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W)
11435 return (__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) __A,
11436 (__v4si) __W,
11437 (__mmask8) __U);
11440 extern __inline __mmask8
11441 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11442 _mm256_cmp_epi64_mask (__m256i __X, __m256i __Y, const int __P)
11444 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
11445 (__v4di) __Y, __P,
11446 (__mmask8) -1);
11449 extern __inline __mmask8
11450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11451 _mm256_cmp_epi32_mask (__m256i __X, __m256i __Y, const int __P)
11453 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11454 (__v8si) __Y, __P,
11455 (__mmask8) -1);
11458 extern __inline __mmask8
11459 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11460 _mm256_cmp_epu64_mask (__m256i __X, __m256i __Y, const int __P)
11462 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11463 (__v4di) __Y, __P,
11464 (__mmask8) -1);
11467 extern __inline __mmask8
11468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11469 _mm256_cmp_epu32_mask (__m256i __X, __m256i __Y, const int __P)
11471 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11472 (__v8si) __Y, __P,
11473 (__mmask8) -1);
11476 extern __inline __mmask8
11477 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11478 _mm256_cmp_pd_mask (__m256d __X, __m256d __Y, const int __P)
11480 return (__mmask8) __builtin_ia32_cmppd256_mask ((__v4df) __X,
11481 (__v4df) __Y, __P,
11482 (__mmask8) -1);
11485 extern __inline __mmask8
11486 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11487 _mm256_cmp_ps_mask (__m256 __X, __m256 __Y, const int __P)
11489 return (__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf) __X,
11490 (__v8sf) __Y, __P,
11491 (__mmask8) -1);
11494 extern __inline __mmask8
11495 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11496 _mm256_mask_cmp_epi64_mask (__mmask8 __U, __m256i __X, __m256i __Y,
11497 const int __P)
11499 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
11500 (__v4di) __Y, __P,
11501 (__mmask8) __U);
11504 extern __inline __mmask8
11505 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11506 _mm256_mask_cmp_epi32_mask (__mmask8 __U, __m256i __X, __m256i __Y,
11507 const int __P)
11509 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11510 (__v8si) __Y, __P,
11511 (__mmask8) __U);
11514 extern __inline __mmask8
11515 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11516 _mm256_mask_cmp_epu64_mask (__mmask8 __U, __m256i __X, __m256i __Y,
11517 const int __P)
11519 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11520 (__v4di) __Y, __P,
11521 (__mmask8) __U);
11524 extern __inline __mmask8
11525 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11526 _mm256_mask_cmp_epu32_mask (__mmask8 __U, __m256i __X, __m256i __Y,
11527 const int __P)
11529 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11530 (__v8si) __Y, __P,
11531 (__mmask8) __U);
11534 extern __inline __mmask8
11535 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11536 _mm256_mask_cmp_pd_mask (__mmask8 __U, __m256d __X, __m256d __Y,
11537 const int __P)
11539 return (__mmask8) __builtin_ia32_cmppd256_mask ((__v4df) __X,
11540 (__v4df) __Y, __P,
11541 (__mmask8) __U);
11544 extern __inline __mmask8
11545 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11546 _mm256_mask_cmp_ps_mask (__mmask8 __U, __m256 __X, __m256 __Y,
11547 const int __P)
11549 return (__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf) __X,
11550 (__v8sf) __Y, __P,
11551 (__mmask8) __U);
11554 extern __inline __mmask8
11555 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11556 _mm_cmp_epi64_mask (__m128i __X, __m128i __Y, const int __P)
11558 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
11559 (__v2di) __Y, __P,
11560 (__mmask8) -1);
11563 extern __inline __mmask8
11564 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11565 _mm_cmp_epi32_mask (__m128i __X, __m128i __Y, const int __P)
11567 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
11568 (__v4si) __Y, __P,
11569 (__mmask8) -1);
11572 extern __inline __mmask8
11573 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11574 _mm_cmp_epu64_mask (__m128i __X, __m128i __Y, const int __P)
11576 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
11577 (__v2di) __Y, __P,
11578 (__mmask8) -1);
11581 extern __inline __mmask8
11582 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11583 _mm_cmp_epu32_mask (__m128i __X, __m128i __Y, const int __P)
11585 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
11586 (__v4si) __Y, __P,
11587 (__mmask8) -1);
11590 extern __inline __mmask8
11591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11592 _mm_cmp_pd_mask (__m128d __X, __m128d __Y, const int __P)
11594 return (__mmask8) __builtin_ia32_cmppd128_mask ((__v2df) __X,
11595 (__v2df) __Y, __P,
11596 (__mmask8) -1);
11599 extern __inline __mmask8
11600 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11601 _mm_cmp_ps_mask (__m128 __X, __m128 __Y, const int __P)
11603 return (__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf) __X,
11604 (__v4sf) __Y, __P,
11605 (__mmask8) -1);
11608 extern __inline __mmask8
11609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11610 _mm_mask_cmp_epi64_mask (__mmask8 __U, __m128i __X, __m128i __Y,
11611 const int __P)
11613 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
11614 (__v2di) __Y, __P,
11615 (__mmask8) __U);
11618 extern __inline __mmask8
11619 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11620 _mm_mask_cmp_epi32_mask (__mmask8 __U, __m128i __X, __m128i __Y,
11621 const int __P)
11623 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
11624 (__v4si) __Y, __P,
11625 (__mmask8) __U);
11628 extern __inline __mmask8
11629 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11630 _mm_mask_cmp_epu64_mask (__mmask8 __U, __m128i __X, __m128i __Y,
11631 const int __P)
11633 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
11634 (__v2di) __Y, __P,
11635 (__mmask8) __U);
11638 extern __inline __mmask8
11639 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11640 _mm_mask_cmp_epu32_mask (__mmask8 __U, __m128i __X, __m128i __Y,
11641 const int __P)
11643 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
11644 (__v4si) __Y, __P,
11645 (__mmask8) __U);
11648 extern __inline __mmask8
11649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11650 _mm_mask_cmp_pd_mask (__mmask8 __U, __m128d __X, __m128d __Y,
11651 const int __P)
11653 return (__mmask8) __builtin_ia32_cmppd128_mask ((__v2df) __X,
11654 (__v2df) __Y, __P,
11655 (__mmask8) __U);
11658 extern __inline __mmask8
11659 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11660 _mm_mask_cmp_ps_mask (__mmask8 __U, __m128 __X, __m128 __Y,
11661 const int __P)
11663 return (__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf) __X,
11664 (__v4sf) __Y, __P,
11665 (__mmask8) __U);
11668 extern __inline __m256d
11669 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11670 _mm256_permutex_pd (__m256d __X, const int __M)
11672 return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __M,
11673 (__v4df)
11674 _mm256_undefined_pd (),
11675 (__mmask8) -1);
11678 extern __inline __mmask8
11679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11680 _mm256_cmpneq_epu32_mask (__m256i __X, __m256i __Y)
11682 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11683 (__v8si) __Y, 4,
11684 (__mmask8) - 1);
11687 extern __inline __mmask8
11688 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11689 _mm256_cmplt_epu32_mask (__m256i __X, __m256i __Y)
11691 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11692 (__v8si) __Y, 1,
11693 (__mmask8) - 1);
11696 extern __inline __mmask8
11697 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11698 _mm256_cmpge_epu32_mask (__m256i __X, __m256i __Y)
11700 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11701 (__v8si) __Y, 5,
11702 (__mmask8) - 1);
11705 extern __inline __mmask8
11706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11707 _mm256_cmple_epu32_mask (__m256i __X, __m256i __Y)
11709 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11710 (__v8si) __Y, 2,
11711 (__mmask8) - 1);
11714 extern __inline __mmask8
11715 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11716 _mm256_cmpneq_epu64_mask (__m256i __X, __m256i __Y)
11718 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11719 (__v4di) __Y, 4,
11720 (__mmask8) - 1);
11723 extern __inline __mmask8
11724 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11725 _mm256_cmplt_epu64_mask (__m256i __X, __m256i __Y)
11727 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11728 (__v4di) __Y, 1,
11729 (__mmask8) - 1);
11732 extern __inline __mmask8
11733 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11734 _mm256_cmpge_epu64_mask (__m256i __X, __m256i __Y)
11736 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11737 (__v4di) __Y, 5,
11738 (__mmask8) - 1);
11741 extern __inline __mmask8
11742 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11743 _mm256_cmple_epu64_mask (__m256i __X, __m256i __Y)
11745 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11746 (__v4di) __Y, 2,
11747 (__mmask8) - 1);
11750 extern __inline __mmask8
11751 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11752 _mm256_cmpneq_epi32_mask (__m256i __X, __m256i __Y)
11754 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11755 (__v8si) __Y, 4,
11756 (__mmask8) - 1);
11759 extern __inline __mmask8
11760 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11761 _mm256_cmplt_epi32_mask (__m256i __X, __m256i __Y)
11763 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11764 (__v8si) __Y, 1,
11765 (__mmask8) - 1);
11768 extern __inline __mmask8
11769 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11770 _mm256_cmpge_epi32_mask (__m256i __X, __m256i __Y)
11772 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11773 (__v8si) __Y, 5,
11774 (__mmask8) - 1);
11777 extern __inline __mmask8
11778 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11779 _mm256_cmple_epi32_mask (__m256i __X, __m256i __Y)
11781 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11782 (__v8si) __Y, 2,
11783 (__mmask8) - 1);
11786 extern __inline __mmask8
11787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11788 _mm256_cmpneq_epi64_mask (__m256i __X, __m256i __Y)
11790 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
11791 (__v4di) __Y, 4,
11792 (__mmask8) - 1);
11795 extern __inline __mmask8
11796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11797 _mm256_cmplt_epi64_mask (__m256i __X, __m256i __Y)
11799 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
11800 (__v4di) __Y, 1,
11801 (__mmask8) - 1);
11804 extern __inline __mmask8
11805 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11806 _mm256_cmpge_epi64_mask (__m256i __X, __m256i __Y)
11808 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
11809 (__v4di) __Y, 5,
11810 (__mmask8) - 1);
11813 extern __inline __mmask8
11814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11815 _mm256_cmple_epi64_mask (__m256i __X, __m256i __Y)
11817 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
11818 (__v4di) __Y, 2,
11819 (__mmask8) - 1);
11822 extern __inline __mmask8
11823 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11824 _mm_cmpneq_epu32_mask (__m128i __X, __m128i __Y)
11826 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
11827 (__v4si) __Y, 4,
11828 (__mmask8) - 1);
11831 extern __inline __mmask8
11832 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11833 _mm_cmplt_epu32_mask (__m128i __X, __m128i __Y)
11835 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
11836 (__v4si) __Y, 1,
11837 (__mmask8) - 1);
11840 extern __inline __mmask8
11841 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11842 _mm_cmpge_epu32_mask (__m128i __X, __m128i __Y)
11844 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
11845 (__v4si) __Y, 5,
11846 (__mmask8) - 1);
11849 extern __inline __mmask8
11850 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11851 _mm_cmple_epu32_mask (__m128i __X, __m128i __Y)
11853 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
11854 (__v4si) __Y, 2,
11855 (__mmask8) - 1);
11858 extern __inline __mmask8
11859 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11860 _mm_cmpneq_epu64_mask (__m128i __X, __m128i __Y)
11862 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
11863 (__v2di) __Y, 4,
11864 (__mmask8) - 1);
11867 extern __inline __mmask8
11868 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11869 _mm_cmplt_epu64_mask (__m128i __X, __m128i __Y)
11871 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
11872 (__v2di) __Y, 1,
11873 (__mmask8) - 1);
11876 extern __inline __mmask8
11877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11878 _mm_cmpge_epu64_mask (__m128i __X, __m128i __Y)
11880 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
11881 (__v2di) __Y, 5,
11882 (__mmask8) - 1);
11885 extern __inline __mmask8
11886 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11887 _mm_cmple_epu64_mask (__m128i __X, __m128i __Y)
11889 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
11890 (__v2di) __Y, 2,
11891 (__mmask8) - 1);
11894 extern __inline __mmask8
11895 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11896 _mm_cmpneq_epi32_mask (__m128i __X, __m128i __Y)
11898 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
11899 (__v4si) __Y, 4,
11900 (__mmask8) - 1);
11903 extern __inline __mmask8
11904 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11905 _mm_cmplt_epi32_mask (__m128i __X, __m128i __Y)
11907 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
11908 (__v4si) __Y, 1,
11909 (__mmask8) - 1);
11912 extern __inline __mmask8
11913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11914 _mm_cmpge_epi32_mask (__m128i __X, __m128i __Y)
11916 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
11917 (__v4si) __Y, 5,
11918 (__mmask8) - 1);
11921 extern __inline __mmask8
11922 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11923 _mm_cmple_epi32_mask (__m128i __X, __m128i __Y)
11925 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
11926 (__v4si) __Y, 2,
11927 (__mmask8) - 1);
11930 extern __inline __mmask8
11931 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11932 _mm_cmpneq_epi64_mask (__m128i __X, __m128i __Y)
11934 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
11935 (__v2di) __Y, 4,
11936 (__mmask8) - 1);
11939 extern __inline __mmask8
11940 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11941 _mm_cmplt_epi64_mask (__m128i __X, __m128i __Y)
11943 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
11944 (__v2di) __Y, 1,
11945 (__mmask8) - 1);
11948 extern __inline __mmask8
11949 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11950 _mm_cmpge_epi64_mask (__m128i __X, __m128i __Y)
11952 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
11953 (__v2di) __Y, 5,
11954 (__mmask8) - 1);
11957 extern __inline __mmask8
11958 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11959 _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
11961 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
11962 (__v2di) __Y, 2,
11963 (__mmask8) - 1);
11966 #else
11967 #define _mm256_permutex_pd(X, M) \
11968 ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(X), (int)(M), \
11969 (__v4df)(__m256d)_mm256_undefined_pd(),\
11970 (__mmask8)-1))
11972 #define _mm256_maskz_permutex_epi64(M, X, I) \
11973 ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
11974 (int)(I), \
11975 (__v4di)(__m256i) \
11976 (_mm256_setzero_si256()),\
11977 (__mmask8)(M)))
11979 #define _mm256_mask_permutex_epi64(W, M, X, I) \
11980 ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
11981 (int)(I), \
11982 (__v4di)(__m256i)(W), \
11983 (__mmask8)(M)))
11985 #define _mm256_insertf32x4(X, Y, C) \
11986 ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \
11987 (__v4sf)(__m128) (Y), (int) (C), \
11988 (__v8sf)(__m256)_mm256_setzero_ps(), \
11989 (__mmask8)-1))
11991 #define _mm256_mask_insertf32x4(W, U, X, Y, C) \
11992 ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \
11993 (__v4sf)(__m128) (Y), (int) (C), \
11994 (__v8sf)(__m256)(W), \
11995 (__mmask8)(U)))
11997 #define _mm256_maskz_insertf32x4(U, X, Y, C) \
11998 ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \
11999 (__v4sf)(__m128) (Y), (int) (C), \
12000 (__v8sf)(__m256)_mm256_setzero_ps(), \
12001 (__mmask8)(U)))
12003 #define _mm256_inserti32x4(X, Y, C) \
12004 ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
12005 (__v4si)(__m128i) (Y), (int) (C), \
12006 (__v8si)(__m256i)_mm256_setzero_si256(), \
12007 (__mmask8)-1))
12009 #define _mm256_mask_inserti32x4(W, U, X, Y, C) \
12010 ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
12011 (__v4si)(__m128i) (Y), (int) (C), \
12012 (__v8si)(__m256i)(W), \
12013 (__mmask8)(U)))
12015 #define _mm256_maskz_inserti32x4(U, X, Y, C) \
12016 ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
12017 (__v4si)(__m128i) (Y), (int) (C), \
12018 (__v8si)(__m256i)_mm256_setzero_si256(), \
12019 (__mmask8)(U)))
12021 #define _mm256_extractf32x4_ps(X, C) \
12022 ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
12023 (int) (C), \
12024 (__v4sf)(__m128)_mm_setzero_ps(), \
12025 (__mmask8)-1))
12027 #define _mm256_mask_extractf32x4_ps(W, U, X, C) \
12028 ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
12029 (int) (C), \
12030 (__v4sf)(__m128)(W), \
12031 (__mmask8)(U)))
12033 #define _mm256_maskz_extractf32x4_ps(U, X, C) \
12034 ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
12035 (int) (C), \
12036 (__v4sf)(__m128)_mm_setzero_ps(), \
12037 (__mmask8)(U)))
12039 #define _mm256_extracti32x4_epi32(X, C) \
12040 ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
12041 (int) (C), (__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)-1))
12043 #define _mm256_mask_extracti32x4_epi32(W, U, X, C) \
12044 ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
12045 (int) (C), (__v4si)(__m128i)(W), (__mmask8)(U)))
12047 #define _mm256_maskz_extracti32x4_epi32(U, X, C) \
12048 ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
12049 (int) (C), (__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)(U)))
12051 #define _mm256_shuffle_i64x2(X, Y, C) \
12052 ((__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X), \
12053 (__v4di)(__m256i)(Y), (int)(C), \
12054 (__v4di)(__m256i)_mm256_setzero_si256 (), \
12055 (__mmask8)-1))
12057 #define _mm256_mask_shuffle_i64x2(W, U, X, Y, C) \
12058 ((__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X), \
12059 (__v4di)(__m256i)(Y), (int)(C), \
12060 (__v4di)(__m256i)(W),\
12061 (__mmask8)(U)))
12063 #define _mm256_maskz_shuffle_i64x2(U, X, Y, C) \
12064 ((__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X), \
12065 (__v4di)(__m256i)(Y), (int)(C), \
12066 (__v4di)(__m256i)_mm256_setzero_si256 (), \
12067 (__mmask8)(U)))
12069 #define _mm256_shuffle_i32x4(X, Y, C) \
12070 ((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \
12071 (__v8si)(__m256i)(Y), (int)(C), \
12072 (__v8si)(__m256i)_mm256_setzero_si256(), \
12073 (__mmask8)-1))
12075 #define _mm256_mask_shuffle_i32x4(W, U, X, Y, C) \
12076 ((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \
12077 (__v8si)(__m256i)(Y), (int)(C), \
12078 (__v8si)(__m256i)(W), \
12079 (__mmask8)(U)))
12081 #define _mm256_maskz_shuffle_i32x4(U, X, Y, C) \
12082 ((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \
12083 (__v8si)(__m256i)(Y), (int)(C), \
12084 (__v8si)(__m256i)_mm256_setzero_si256(), \
12085 (__mmask8)(U)))
12087 #define _mm256_shuffle_f64x2(X, Y, C) \
12088 ((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \
12089 (__v4df)(__m256d)(Y), (int)(C), \
12090 (__v4df)(__m256d)_mm256_setzero_pd(), \
12091 (__mmask8)-1))
12093 #define _mm256_mask_shuffle_f64x2(W, U, X, Y, C) \
12094 ((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \
12095 (__v4df)(__m256d)(Y), (int)(C), \
12096 (__v4df)(__m256d)(W), \
12097 (__mmask8)(U)))
12099 #define _mm256_maskz_shuffle_f64x2(U, X, Y, C) \
12100 ((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \
12101 (__v4df)(__m256d)(Y), (int)(C), \
12102 (__v4df)(__m256d)_mm256_setzero_pd(), \
12103 (__mmask8)(U)))
12105 #define _mm256_shuffle_f32x4(X, Y, C) \
12106 ((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \
12107 (__v8sf)(__m256)(Y), (int)(C), \
12108 (__v8sf)(__m256)_mm256_setzero_ps(), \
12109 (__mmask8)-1))
12111 #define _mm256_mask_shuffle_f32x4(W, U, X, Y, C) \
12112 ((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \
12113 (__v8sf)(__m256)(Y), (int)(C), \
12114 (__v8sf)(__m256)(W), \
12115 (__mmask8)(U)))
12117 #define _mm256_maskz_shuffle_f32x4(U, X, Y, C) \
12118 ((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \
12119 (__v8sf)(__m256)(Y), (int)(C), \
12120 (__v8sf)(__m256)_mm256_setzero_ps(), \
12121 (__mmask8)(U)))
12123 #define _mm256_mask_shuffle_pd(W, U, A, B, C) \
12124 ((__m256d)__builtin_ia32_shufpd256_mask ((__v4df)(__m256d)(A), \
12125 (__v4df)(__m256d)(B), (int)(C), \
12126 (__v4df)(__m256d)(W), \
12127 (__mmask8)(U)))
12129 #define _mm256_maskz_shuffle_pd(U, A, B, C) \
12130 ((__m256d)__builtin_ia32_shufpd256_mask ((__v4df)(__m256d)(A), \
12131 (__v4df)(__m256d)(B), (int)(C), \
12132 (__v4df)(__m256d)_mm256_setzero_pd(),\
12133 (__mmask8)(U)))
12135 #define _mm_mask_shuffle_pd(W, U, A, B, C) \
12136 ((__m128d)__builtin_ia32_shufpd128_mask ((__v2df)(__m128d)(A), \
12137 (__v2df)(__m128d)(B), (int)(C), \
12138 (__v2df)(__m128d)(W), \
12139 (__mmask8)(U)))
12141 #define _mm_maskz_shuffle_pd(U, A, B, C) \
12142 ((__m128d)__builtin_ia32_shufpd128_mask ((__v2df)(__m128d)(A), \
12143 (__v2df)(__m128d)(B), (int)(C), \
12144 (__v2df)(__m128d)_mm_setzero_pd(), \
12145 (__mmask8)(U)))
12147 #define _mm256_mask_shuffle_ps(W, U, A, B, C) \
12148 ((__m256) __builtin_ia32_shufps256_mask ((__v8sf)(__m256)(A), \
12149 (__v8sf)(__m256)(B), (int)(C), \
12150 (__v8sf)(__m256)(W), \
12151 (__mmask8)(U)))
12153 #define _mm256_maskz_shuffle_ps(U, A, B, C) \
12154 ((__m256) __builtin_ia32_shufps256_mask ((__v8sf)(__m256)(A), \
12155 (__v8sf)(__m256)(B), (int)(C), \
12156 (__v8sf)(__m256)_mm256_setzero_ps(), \
12157 (__mmask8)(U)))
12159 #define _mm_mask_shuffle_ps(W, U, A, B, C) \
12160 ((__m128) __builtin_ia32_shufps128_mask ((__v4sf)(__m128)(A), \
12161 (__v4sf)(__m128)(B), (int)(C), \
12162 (__v4sf)(__m128)(W), \
12163 (__mmask8)(U)))
12165 #define _mm_maskz_shuffle_ps(U, A, B, C) \
12166 ((__m128) __builtin_ia32_shufps128_mask ((__v4sf)(__m128)(A), \
12167 (__v4sf)(__m128)(B), (int)(C), \
12168 (__v4sf)(__m128)_mm_setzero_ps(), \
12169 (__mmask8)(U)))
12171 #define _mm256_fixupimm_pd(X, Y, Z, C) \
12172 ((__m256d)__builtin_ia32_fixupimmpd256 ((__v4df)(__m256d)(X), \
12173 (__v4df)(__m256d)(Y), \
12174 (__v4di)(__m256i)(Z), (int)(C)))
12176 #define _mm256_mask_fixupimm_pd(X, U, Y, Z, C) \
12177 ((__m256d)__builtin_ia32_fixupimmpd256_mask ((__v4df)(__m256d)(X), \
12178 (__v4df)(__m256d)(Y), \
12179 (__v4di)(__m256i)(Z), (int)(C), \
12180 (__mmask8)(U)))
12182 #define _mm256_maskz_fixupimm_pd(U, X, Y, Z, C) \
12183 ((__m256d)__builtin_ia32_fixupimmpd256_maskz ((__v4df)(__m256d)(X), \
12184 (__v4df)(__m256d)(Y), \
12185 (__v4di)(__m256i)(Z), (int)(C),\
12186 (__mmask8)(U)))
12188 #define _mm256_fixupimm_ps(X, Y, Z, C) \
12189 ((__m256)__builtin_ia32_fixupimmps256 ((__v8sf)(__m256)(X), \
12190 (__v8sf)(__m256)(Y), \
12191 (__v8si)(__m256i)(Z), (int)(C)))
12193 #define _mm256_mask_fixupimm_ps(X, U, Y, Z, C) \
12194 ((__m256)__builtin_ia32_fixupimmps256_mask ((__v8sf)(__m256)(X), \
12195 (__v8sf)(__m256)(Y), \
12196 (__v8si)(__m256i)(Z), (int)(C), \
12197 (__mmask8)(U)))
12199 #define _mm256_maskz_fixupimm_ps(U, X, Y, Z, C) \
12200 ((__m256)__builtin_ia32_fixupimmps256_maskz ((__v8sf)(__m256)(X), \
12201 (__v8sf)(__m256)(Y), \
12202 (__v8si)(__m256i)(Z), (int)(C),\
12203 (__mmask8)(U)))
12205 #define _mm_fixupimm_pd(X, Y, Z, C) \
12206 ((__m128d)__builtin_ia32_fixupimmpd128 ((__v2df)(__m128d)(X), \
12207 (__v2df)(__m128d)(Y), \
12208 (__v2di)(__m128i)(Z), (int)(C)))
12210 #define _mm_mask_fixupimm_pd(X, U, Y, Z, C) \
12211 ((__m128d)__builtin_ia32_fixupimmpd128_mask ((__v2df)(__m128d)(X), \
12212 (__v2df)(__m128d)(Y), \
12213 (__v2di)(__m128i)(Z), (int)(C), \
12214 (__mmask8)(U)))
12216 #define _mm_maskz_fixupimm_pd(U, X, Y, Z, C) \
12217 ((__m128d)__builtin_ia32_fixupimmpd128_maskz ((__v2df)(__m128d)(X), \
12218 (__v2df)(__m128d)(Y), \
12219 (__v2di)(__m128i)(Z), (int)(C),\
12220 (__mmask8)(U)))
12222 #define _mm_fixupimm_ps(X, Y, Z, C) \
12223 ((__m128)__builtin_ia32_fixupimmps128 ((__v4sf)(__m128)(X), \
12224 (__v4sf)(__m128)(Y), \
12225 (__v4si)(__m128i)(Z), (int)(C)))
12227 #define _mm_mask_fixupimm_ps(X, U, Y, Z, C) \
12228 ((__m128)__builtin_ia32_fixupimmps128_mask ((__v4sf)(__m128)(X), \
12229 (__v4sf)(__m128)(Y), \
12230 (__v4si)(__m128i)(Z), (int)(C),\
12231 (__mmask8)(U)))
12233 #define _mm_maskz_fixupimm_ps(U, X, Y, Z, C) \
12234 ((__m128)__builtin_ia32_fixupimmps128_maskz ((__v4sf)(__m128)(X), \
12235 (__v4sf)(__m128)(Y), \
12236 (__v4si)(__m128i)(Z), (int)(C),\
12237 (__mmask8)(U)))
12239 #define _mm256_mask_srli_epi32(W, U, A, B) \
12240 ((__m256i) __builtin_ia32_psrldi256_mask ((__v8si)(__m256i)(A), \
12241 (int)(B), (__v8si)(__m256i)(W), (__mmask8)(U)))
12243 #define _mm256_maskz_srli_epi32(U, A, B) \
12244 ((__m256i) __builtin_ia32_psrldi256_mask ((__v8si)(__m256i)(A), \
12245 (int)(B), (__v8si)_mm256_setzero_si256(), (__mmask8)(U)))
12247 #define _mm_mask_srli_epi32(W, U, A, B) \
12248 ((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A), \
12249 (int)(B), (__v4si)(__m128i)(W), (__mmask8)(U)))
12251 #define _mm_maskz_srli_epi32(U, A, B) \
12252 ((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A), \
12253 (int)(B), (__v4si)_mm_setzero_si128(), (__mmask8)(U)))
12255 #define _mm256_mask_srli_epi64(W, U, A, B) \
12256 ((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A), \
12257 (int)(B), (__v4di)(__m256i)(W), (__mmask8)(U)))
12259 #define _mm256_maskz_srli_epi64(U, A, B) \
12260 ((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A), \
12261 (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)(U)))
12263 #define _mm_mask_srli_epi64(W, U, A, B) \
12264 ((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A), \
12265 (int)(B), (__v2di)(__m128i)(W), (__mmask8)(U)))
12267 #define _mm_maskz_srli_epi64(U, A, B) \
12268 ((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A), \
12269 (int)(B), (__v2di)_mm_setzero_si128(), (__mmask8)(U)))
12271 #define _mm256_mask_slli_epi32(W, U, X, C) \
12272 ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\
12273 (__v8si)(__m256i)(W),\
12274 (__mmask8)(U)))
12276 #define _mm256_maskz_slli_epi32(U, X, C) \
12277 ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\
12278 (__v8si)(__m256i)_mm256_setzero_si256(),\
12279 (__mmask8)(U)))
12281 #define _mm256_mask_slli_epi64(W, U, X, C) \
12282 ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\
12283 (__v4di)(__m256i)(W),\
12284 (__mmask8)(U)))
12286 #define _mm256_maskz_slli_epi64(U, X, C) \
12287 ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\
12288 (__v4di)(__m256i)_mm256_setzero_si256 (),\
12289 (__mmask8)(U)))
12291 #define _mm_mask_slli_epi32(W, U, X, C) \
12292 ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X), (int)(C),\
12293 (__v4si)(__m128i)(W),\
12294 (__mmask8)(U)))
12296 #define _mm_maskz_slli_epi32(U, X, C) \
12297 ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X), (int)(C),\
12298 (__v4si)(__m128i)_mm_setzero_si128 (),\
12299 (__mmask8)(U)))
12301 #define _mm_mask_slli_epi64(W, U, X, C) \
12302 ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C),\
12303 (__v2di)(__m128i)(W),\
12304 (__mmask8)(U)))
12306 #define _mm_maskz_slli_epi64(U, X, C) \
12307 ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C),\
12308 (__v2di)(__m128i)_mm_setzero_di(),\
12309 (__mmask8)(U)))
12311 #define _mm256_ternarylogic_epi64(A, B, C, I) \
12312 ((__m256i) __builtin_ia32_pternlogq256_mask ((__v4di)(__m256i)(A), \
12313 (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)-1))
12315 #define _mm256_mask_ternarylogic_epi64(A, U, B, C, I) \
12316 ((__m256i) __builtin_ia32_pternlogq256_mask ((__v4di)(__m256i)(A), \
12317 (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)(U)))
12319 #define _mm256_maskz_ternarylogic_epi64(U, A, B, C, I) \
12320 ((__m256i) __builtin_ia32_pternlogq256_maskz ((__v4di)(__m256i)(A), \
12321 (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)(U)))
12323 #define _mm256_ternarylogic_epi32(A, B, C, I) \
12324 ((__m256i) __builtin_ia32_pternlogd256_mask ((__v8si)(__m256i)(A), \
12325 (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)-1))
12327 #define _mm256_mask_ternarylogic_epi32(A, U, B, C, I) \
12328 ((__m256i) __builtin_ia32_pternlogd256_mask ((__v8si)(__m256i)(A), \
12329 (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)(U)))
12331 #define _mm256_maskz_ternarylogic_epi32(U, A, B, C, I) \
12332 ((__m256i) __builtin_ia32_pternlogd256_maskz ((__v8si)(__m256i)(A), \
12333 (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)(U)))
12335 #define _mm_ternarylogic_epi64(A, B, C, I) \
12336 ((__m128i) __builtin_ia32_pternlogq128_mask ((__v2di)(__m128i)(A), \
12337 (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)-1))
12339 #define _mm_mask_ternarylogic_epi64(A, U, B, C, I) \
12340 ((__m128i) __builtin_ia32_pternlogq128_mask ((__v2di)(__m128i)(A), \
12341 (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)(U)))
12343 #define _mm_maskz_ternarylogic_epi64(U, A, B, C, I) \
12344 ((__m128i) __builtin_ia32_pternlogq128_maskz ((__v2di)(__m128i)(A), \
12345 (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)(U)))
12347 #define _mm_ternarylogic_epi32(A, B, C, I) \
12348 ((__m128i) __builtin_ia32_pternlogd128_mask ((__v4si)(__m128i)(A), \
12349 (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)-1))
12351 #define _mm_mask_ternarylogic_epi32(A, U, B, C, I) \
12352 ((__m128i) __builtin_ia32_pternlogd128_mask ((__v4si)(__m128i)(A), \
12353 (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)(U)))
12355 #define _mm_maskz_ternarylogic_epi32(U, A, B, C, I) \
12356 ((__m128i) __builtin_ia32_pternlogd128_maskz ((__v4si)(__m128i)(A), \
12357 (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)(U)))
12359 #define _mm256_roundscale_ps(A, B) \
12360 ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \
12361 (int)(B), (__v8sf)(__m256)_mm256_setzero_ps(), (__mmask8)-1))
12363 #define _mm256_mask_roundscale_ps(W, U, A, B) \
12364 ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \
12365 (int)(B), (__v8sf)(__m256)(W), (__mmask8)(U)))
12367 #define _mm256_maskz_roundscale_ps(U, A, B) \
12368 ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \
12369 (int)(B), (__v8sf)(__m256)_mm256_setzero_ps(), (__mmask8)(U)))
12371 #define _mm256_roundscale_pd(A, B) \
12372 ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \
12373 (int)(B), (__v4df)(__m256d)_mm256_setzero_pd(), (__mmask8)-1))
12375 #define _mm256_mask_roundscale_pd(W, U, A, B) \
12376 ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \
12377 (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
12379 #define _mm256_maskz_roundscale_pd(U, A, B) \
12380 ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \
12381 (int)(B), (__v4df)(__m256d)_mm256_setzero_pd(), (__mmask8)(U)))
12383 #define _mm_roundscale_ps(A, B) \
12384 ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \
12385 (int)(B), (__v4sf)(__m128)_mm_setzero_ps(), (__mmask8)-1))
12387 #define _mm_mask_roundscale_ps(W, U, A, B) \
12388 ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \
12389 (int)(B), (__v4sf)(__m128)(W), (__mmask8)(U)))
12391 #define _mm_maskz_roundscale_ps(U, A, B) \
12392 ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \
12393 (int)(B), (__v4sf)(__m128)_mm_setzero_ps(), (__mmask8)(U)))
12395 #define _mm_roundscale_pd(A, B) \
12396 ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \
12397 (int)(B), (__v2df)(__m128d)_mm_setzero_pd(), (__mmask8)-1))
12399 #define _mm_mask_roundscale_pd(W, U, A, B) \
12400 ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \
12401 (int)(B), (__v2df)(__m128d)(W), (__mmask8)(U)))
12403 #define _mm_maskz_roundscale_pd(U, A, B) \
12404 ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \
12405 (int)(B), (__v2df)(__m128d)_mm_setzero_pd(), (__mmask8)(U)))
12407 #define _mm256_getmant_ps(X, B, C) \
12408 ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \
12409 (int)(((C)<<2) | (B)), \
12410 (__v8sf)(__m256)_mm256_setzero_ps(), \
12411 (__mmask8)-1))
12413 #define _mm256_mask_getmant_ps(W, U, X, B, C) \
12414 ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \
12415 (int)(((C)<<2) | (B)), \
12416 (__v8sf)(__m256)(W), \
12417 (__mmask8)(U)))
12419 #define _mm256_maskz_getmant_ps(U, X, B, C) \
12420 ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \
12421 (int)(((C)<<2) | (B)), \
12422 (__v8sf)(__m256)_mm256_setzero_ps(), \
12423 (__mmask8)(U)))
12425 #define _mm_getmant_ps(X, B, C) \
12426 ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \
12427 (int)(((C)<<2) | (B)), \
12428 (__v4sf)(__m128)_mm_setzero_ps(), \
12429 (__mmask8)-1))
12431 #define _mm_mask_getmant_ps(W, U, X, B, C) \
12432 ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \
12433 (int)(((C)<<2) | (B)), \
12434 (__v4sf)(__m128)(W), \
12435 (__mmask8)(U)))
12437 #define _mm_maskz_getmant_ps(U, X, B, C) \
12438 ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \
12439 (int)(((C)<<2) | (B)), \
12440 (__v4sf)(__m128)_mm_setzero_ps(), \
12441 (__mmask8)(U)))
12443 #define _mm256_getmant_pd(X, B, C) \
12444 ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \
12445 (int)(((C)<<2) | (B)), \
12446 (__v4df)(__m256d)_mm256_setzero_pd(), \
12447 (__mmask8)-1))
12449 #define _mm256_mask_getmant_pd(W, U, X, B, C) \
12450 ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \
12451 (int)(((C)<<2) | (B)), \
12452 (__v4df)(__m256d)(W), \
12453 (__mmask8)(U)))
12455 #define _mm256_maskz_getmant_pd(U, X, B, C) \
12456 ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \
12457 (int)(((C)<<2) | (B)), \
12458 (__v4df)(__m256d)_mm256_setzero_pd(), \
12459 (__mmask8)(U)))
12461 #define _mm_getmant_pd(X, B, C) \
12462 ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \
12463 (int)(((C)<<2) | (B)), \
12464 (__v2df)(__m128d)_mm_setzero_pd(), \
12465 (__mmask8)-1))
12467 #define _mm_mask_getmant_pd(W, U, X, B, C) \
12468 ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \
12469 (int)(((C)<<2) | (B)), \
12470 (__v2df)(__m128d)(W), \
12471 (__mmask8)(U)))
12473 #define _mm_maskz_getmant_pd(U, X, B, C) \
12474 ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \
12475 (int)(((C)<<2) | (B)), \
12476 (__v2df)(__m128d)_mm_setzero_pd(), \
12477 (__mmask8)(U)))
12479 #define _mm256_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
12480 (__m256) __builtin_ia32_gather3siv8sf ((__v8sf)(__m256)V1OLD, \
12481 (float const *)ADDR, \
12482 (__v8si)(__m256i)INDEX, \
12483 (__mmask8)MASK, (int)SCALE)
12485 #define _mm_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
12486 (__m128) __builtin_ia32_gather3siv4sf ((__v4sf)(__m128)V1OLD, \
12487 (float const *)ADDR, \
12488 (__v4si)(__m128i)INDEX, \
12489 (__mmask8)MASK, (int)SCALE)
12491 #define _mm256_mmask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
12492 (__m256d) __builtin_ia32_gather3siv4df ((__v4df)(__m256d)V1OLD, \
12493 (double const *)ADDR, \
12494 (__v4si)(__m128i)INDEX, \
12495 (__mmask8)MASK, (int)SCALE)
12497 #define _mm_mmask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
12498 (__m128d) __builtin_ia32_gather3siv2df ((__v2df)(__m128d)V1OLD, \
12499 (double const *)ADDR, \
12500 (__v4si)(__m128i)INDEX, \
12501 (__mmask8)MASK, (int)SCALE)
12503 #define _mm256_mmask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
12504 (__m128) __builtin_ia32_gather3div8sf ((__v4sf)(__m128)V1OLD, \
12505 (float const *)ADDR, \
12506 (__v4di)(__m256i)INDEX, \
12507 (__mmask8)MASK, (int)SCALE)
12509 #define _mm_mmask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
12510 (__m128) __builtin_ia32_gather3div4sf ((__v4sf)(__m128)V1OLD, \
12511 (float const *)ADDR, \
12512 (__v2di)(__m128i)INDEX, \
12513 (__mmask8)MASK, (int)SCALE)
12515 #define _mm256_mmask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
12516 (__m256d) __builtin_ia32_gather3div4df ((__v4df)(__m256d)V1OLD, \
12517 (double const *)ADDR, \
12518 (__v4di)(__m256i)INDEX, \
12519 (__mmask8)MASK, (int)SCALE)
12521 #define _mm_mmask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
12522 (__m128d) __builtin_ia32_gather3div2df ((__v2df)(__m128d)V1OLD, \
12523 (double const *)ADDR, \
12524 (__v2di)(__m128i)INDEX, \
12525 (__mmask8)MASK, (int)SCALE)
12527 #define _mm256_mmask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
12528 (__m256i) __builtin_ia32_gather3siv8si ((__v8si)(__m256i)V1OLD, \
12529 (int const *)ADDR, \
12530 (__v8si)(__m256i)INDEX, \
12531 (__mmask8)MASK, (int)SCALE)
12533 #define _mm_mmask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
12534 (__m128i) __builtin_ia32_gather3siv4si ((__v4si)(__m128i)V1OLD, \
12535 (int const *)ADDR, \
12536 (__v4si)(__m128i)INDEX, \
12537 (__mmask8)MASK, (int)SCALE)
12539 #define _mm256_mmask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
12540 (__m256i) __builtin_ia32_gather3siv4di ((__v4di)(__m256i)V1OLD, \
12541 (long long const *)ADDR, \
12542 (__v4si)(__m128i)INDEX, \
12543 (__mmask8)MASK, (int)SCALE)
12545 #define _mm_mmask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
12546 (__m128i) __builtin_ia32_gather3siv2di ((__v2di)(__m128i)V1OLD, \
12547 (long long const *)ADDR, \
12548 (__v4si)(__m128i)INDEX, \
12549 (__mmask8)MASK, (int)SCALE)
12551 #define _mm256_mmask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
12552 (__m128i) __builtin_ia32_gather3div8si ((__v4si)(__m128i)V1OLD, \
12553 (int const *)ADDR, \
12554 (__v4di)(__m256i)INDEX, \
12555 (__mmask8)MASK, (int)SCALE)
12557 #define _mm_mmask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
12558 (__m128i) __builtin_ia32_gather3div4si ((__v4si)(__m128i)V1OLD, \
12559 (int const *)ADDR, \
12560 (__v2di)(__m128i)INDEX, \
12561 (__mmask8)MASK, (int)SCALE)
12563 #define _mm256_mmask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
12564 (__m256i) __builtin_ia32_gather3div4di ((__v4di)(__m256i)V1OLD, \
12565 (long long const *)ADDR, \
12566 (__v4di)(__m256i)INDEX, \
12567 (__mmask8)MASK, (int)SCALE)
12569 #define _mm_mmask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
12570 (__m128i) __builtin_ia32_gather3div2di ((__v2di)(__m128i)V1OLD, \
12571 (long long const *)ADDR, \
12572 (__v2di)(__m128i)INDEX, \
12573 (__mmask8)MASK, (int)SCALE)
12575 #define _mm256_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
12576 __builtin_ia32_scattersiv8sf ((float *)ADDR, (__mmask8)0xFF, \
12577 (__v8si)(__m256i)INDEX, \
12578 (__v8sf)(__m256)V1, (int)SCALE)
12580 #define _mm256_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
12581 __builtin_ia32_scattersiv8sf ((float *)ADDR, (__mmask8)MASK, \
12582 (__v8si)(__m256i)INDEX, \
12583 (__v8sf)(__m256)V1, (int)SCALE)
12585 #define _mm_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
12586 __builtin_ia32_scattersiv4sf ((float *)ADDR, (__mmask8)0xFF, \
12587 (__v4si)(__m128i)INDEX, \
12588 (__v4sf)(__m128)V1, (int)SCALE)
12590 #define _mm_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
12591 __builtin_ia32_scattersiv4sf ((float *)ADDR, (__mmask8)MASK, \
12592 (__v4si)(__m128i)INDEX, \
12593 (__v4sf)(__m128)V1, (int)SCALE)
12595 #define _mm256_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
12596 __builtin_ia32_scattersiv4df ((double *)ADDR, (__mmask8)0xFF, \
12597 (__v4si)(__m128i)INDEX, \
12598 (__v4df)(__m256d)V1, (int)SCALE)
12600 #define _mm256_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
12601 __builtin_ia32_scattersiv4df ((double *)ADDR, (__mmask8)MASK, \
12602 (__v4si)(__m128i)INDEX, \
12603 (__v4df)(__m256d)V1, (int)SCALE)
12605 #define _mm_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
12606 __builtin_ia32_scattersiv2df ((double *)ADDR, (__mmask8)0xFF, \
12607 (__v4si)(__m128i)INDEX, \
12608 (__v2df)(__m128d)V1, (int)SCALE)
12610 #define _mm_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
12611 __builtin_ia32_scattersiv2df ((double *)ADDR, (__mmask8)MASK, \
12612 (__v4si)(__m128i)INDEX, \
12613 (__v2df)(__m128d)V1, (int)SCALE)
12615 #define _mm256_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
12616 __builtin_ia32_scatterdiv8sf ((float *)ADDR, (__mmask8)0xFF, \
12617 (__v4di)(__m256i)INDEX, \
12618 (__v4sf)(__m128)V1, (int)SCALE)
12620 #define _mm256_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
12621 __builtin_ia32_scatterdiv8sf ((float *)ADDR, (__mmask8)MASK, \
12622 (__v4di)(__m256i)INDEX, \
12623 (__v4sf)(__m128)V1, (int)SCALE)
12625 #define _mm_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
12626 __builtin_ia32_scatterdiv4sf ((float *)ADDR, (__mmask8)0xFF, \
12627 (__v2di)(__m128i)INDEX, \
12628 (__v4sf)(__m128)V1, (int)SCALE)
12630 #define _mm_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
12631 __builtin_ia32_scatterdiv4sf ((float *)ADDR, (__mmask8)MASK, \
12632 (__v2di)(__m128i)INDEX, \
12633 (__v4sf)(__m128)V1, (int)SCALE)
12635 #define _mm256_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
12636 __builtin_ia32_scatterdiv4df ((double *)ADDR, (__mmask8)0xFF, \
12637 (__v4di)(__m256i)INDEX, \
12638 (__v4df)(__m256d)V1, (int)SCALE)
12640 #define _mm256_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
12641 __builtin_ia32_scatterdiv4df ((double *)ADDR, (__mmask8)MASK, \
12642 (__v4di)(__m256i)INDEX, \
12643 (__v4df)(__m256d)V1, (int)SCALE)
12645 #define _mm_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
12646 __builtin_ia32_scatterdiv2df ((double *)ADDR, (__mmask8)0xFF, \
12647 (__v2di)(__m128i)INDEX, \
12648 (__v2df)(__m128d)V1, (int)SCALE)
12650 #define _mm_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
12651 __builtin_ia32_scatterdiv2df ((double *)ADDR, (__mmask8)MASK, \
12652 (__v2di)(__m128i)INDEX, \
12653 (__v2df)(__m128d)V1, (int)SCALE)
12655 #define _mm256_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
12656 __builtin_ia32_scattersiv8si ((int *)ADDR, (__mmask8)0xFF, \
12657 (__v8si)(__m256i)INDEX, \
12658 (__v8si)(__m256i)V1, (int)SCALE)
12660 #define _mm256_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
12661 __builtin_ia32_scattersiv8si ((int *)ADDR, (__mmask8)MASK, \
12662 (__v8si)(__m256i)INDEX, \
12663 (__v8si)(__m256i)V1, (int)SCALE)
12665 #define _mm_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
12666 __builtin_ia32_scattersiv4si ((int *)ADDR, (__mmask8)0xFF, \
12667 (__v4si)(__m128i)INDEX, \
12668 (__v4si)(__m128i)V1, (int)SCALE)
12670 #define _mm_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
12671 __builtin_ia32_scattersiv4si ((int *)ADDR, (__mmask8)MASK, \
12672 (__v4si)(__m128i)INDEX, \
12673 (__v4si)(__m128i)V1, (int)SCALE)
12675 #define _mm256_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
12676 __builtin_ia32_scattersiv4di ((long long *)ADDR, (__mmask8)0xFF, \
12677 (__v4si)(__m128i)INDEX, \
12678 (__v4di)(__m256i)V1, (int)SCALE)
12680 #define _mm256_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
12681 __builtin_ia32_scattersiv4di ((long long *)ADDR, (__mmask8)MASK, \
12682 (__v4si)(__m128i)INDEX, \
12683 (__v4di)(__m256i)V1, (int)SCALE)
12685 #define _mm_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
12686 __builtin_ia32_scattersiv2di ((long long *)ADDR, (__mmask8)0xFF, \
12687 (__v4si)(__m128i)INDEX, \
12688 (__v2di)(__m128i)V1, (int)SCALE)
12690 #define _mm_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
12691 __builtin_ia32_scattersiv2di ((long long *)ADDR, (__mmask8)MASK, \
12692 (__v4si)(__m128i)INDEX, \
12693 (__v2di)(__m128i)V1, (int)SCALE)
12695 #define _mm256_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
12696 __builtin_ia32_scatterdiv8si ((int *)ADDR, (__mmask8)0xFF, \
12697 (__v4di)(__m256i)INDEX, \
12698 (__v4si)(__m128i)V1, (int)SCALE)
12700 #define _mm256_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
12701 __builtin_ia32_scatterdiv8si ((int *)ADDR, (__mmask8)MASK, \
12702 (__v4di)(__m256i)INDEX, \
12703 (__v4si)(__m128i)V1, (int)SCALE)
12705 #define _mm_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
12706 __builtin_ia32_scatterdiv4si ((int *)ADDR, (__mmask8)0xFF, \
12707 (__v2di)(__m128i)INDEX, \
12708 (__v4si)(__m128i)V1, (int)SCALE)
12710 #define _mm_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
12711 __builtin_ia32_scatterdiv4si ((int *)ADDR, (__mmask8)MASK, \
12712 (__v2di)(__m128i)INDEX, \
12713 (__v4si)(__m128i)V1, (int)SCALE)
12715 #define _mm256_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
12716 __builtin_ia32_scatterdiv4di ((long long *)ADDR, (__mmask8)0xFF, \
12717 (__v4di)(__m256i)INDEX, \
12718 (__v4di)(__m256i)V1, (int)SCALE)
12720 #define _mm256_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
12721 __builtin_ia32_scatterdiv4di ((long long *)ADDR, (__mmask8)MASK, \
12722 (__v4di)(__m256i)INDEX, \
12723 (__v4di)(__m256i)V1, (int)SCALE)
12725 #define _mm_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
12726 __builtin_ia32_scatterdiv2di ((long long *)ADDR, (__mmask8)0xFF, \
12727 (__v2di)(__m128i)INDEX, \
12728 (__v2di)(__m128i)V1, (int)SCALE)
12730 #define _mm_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
12731 __builtin_ia32_scatterdiv2di ((long long *)ADDR, (__mmask8)MASK, \
12732 (__v2di)(__m128i)INDEX, \
12733 (__v2di)(__m128i)V1, (int)SCALE)
12735 #define _mm256_mask_shuffle_epi32(W, U, X, C) \
12736 ((__m256i) __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C), \
12737 (__v8si)(__m256i)(W), \
12738 (__mmask8)(U)))
12740 #define _mm256_maskz_shuffle_epi32(U, X, C) \
12741 ((__m256i) __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C), \
12742 (__v8si)(__m256i)_mm256_setzero_si256(), \
12743 (__mmask8)(U)))
12745 #define _mm_mask_shuffle_epi32(W, U, X, C) \
12746 ((__m128i) __builtin_ia32_pshufd128_mask ((__v4si)(__m128i)(X), (int)(C), \
12747 (__v4si)(__m128i)(W), \
12748 (__mmask8)(U)))
12750 #define _mm_maskz_shuffle_epi32(U, X, C) \
12751 ((__m128i) __builtin_ia32_pshufd128_mask ((__v4si)(__m128i)(X), (int)(C), \
12752 (__v4si)(__m128i)_mm_setzero_si128 (), \
12753 (__mmask8)(U)))
12755 #define _mm256_rol_epi64(A, B) \
12756 ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B), \
12757 (__v4di)(__m256i)_mm256_setzero_si256 (),\
12758 (__mmask8)-1))
12760 #define _mm256_mask_rol_epi64(W, U, A, B) \
12761 ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B), \
12762 (__v4di)(__m256i)(W), \
12763 (__mmask8)(U)))
12765 #define _mm256_maskz_rol_epi64(U, A, B) \
12766 ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B), \
12767 (__v4di)(__m256i)_mm256_setzero_si256 (),\
12768 (__mmask8)(U)))
12770 #define _mm_rol_epi64(A, B) \
12771 ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \
12772 (__v2di)(__m128i)_mm_setzero_di(), \
12773 (__mmask8)-1))
12775 #define _mm_mask_rol_epi64(W, U, A, B) \
12776 ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \
12777 (__v2di)(__m128i)(W), \
12778 (__mmask8)(U)))
12780 #define _mm_maskz_rol_epi64(U, A, B) \
12781 ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \
12782 (__v2di)(__m128i)_mm_setzero_di(), \
12783 (__mmask8)(U)))
12785 #define _mm256_ror_epi64(A, B) \
12786 ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B), \
12787 (__v4di)(__m256i)_mm256_setzero_si256 (),\
12788 (__mmask8)-1))
12790 #define _mm256_mask_ror_epi64(W, U, A, B) \
12791 ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B), \
12792 (__v4di)(__m256i)(W), \
12793 (__mmask8)(U)))
12795 #define _mm256_maskz_ror_epi64(U, A, B) \
12796 ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B), \
12797 (__v4di)(__m256i)_mm256_setzero_si256 (),\
12798 (__mmask8)(U)))
12800 #define _mm_ror_epi64(A, B) \
12801 ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \
12802 (__v2di)(__m128i)_mm_setzero_di(), \
12803 (__mmask8)-1))
12805 #define _mm_mask_ror_epi64(W, U, A, B) \
12806 ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \
12807 (__v2di)(__m128i)(W), \
12808 (__mmask8)(U)))
12810 #define _mm_maskz_ror_epi64(U, A, B) \
12811 ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \
12812 (__v2di)(__m128i)_mm_setzero_di(), \
12813 (__mmask8)(U)))
12815 #define _mm256_rol_epi32(A, B) \
12816 ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \
12817 (__v8si)(__m256i)_mm256_setzero_si256(),\
12818 (__mmask8)-1))
12820 #define _mm256_mask_rol_epi32(W, U, A, B) \
12821 ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \
12822 (__v8si)(__m256i)(W), \
12823 (__mmask8)(U)))
12825 #define _mm256_maskz_rol_epi32(U, A, B) \
12826 ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \
12827 (__v8si)(__m256i)_mm256_setzero_si256(),\
12828 (__mmask8)(U)))
12830 #define _mm_rol_epi32(A, B) \
12831 ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \
12832 (__v4si)(__m128i)_mm_setzero_si128 (), \
12833 (__mmask8)-1))
12835 #define _mm_mask_rol_epi32(W, U, A, B) \
12836 ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \
12837 (__v4si)(__m128i)(W), \
12838 (__mmask8)(U)))
12840 #define _mm_maskz_rol_epi32(U, A, B) \
12841 ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \
12842 (__v4si)(__m128i)_mm_setzero_si128 (), \
12843 (__mmask8)(U)))
12845 #define _mm256_ror_epi32(A, B) \
12846 ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \
12847 (__v8si)(__m256i)_mm256_setzero_si256(),\
12848 (__mmask8)-1))
12850 #define _mm256_mask_ror_epi32(W, U, A, B) \
12851 ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \
12852 (__v8si)(__m256i)(W), \
12853 (__mmask8)(U)))
12855 #define _mm256_maskz_ror_epi32(U, A, B) \
12856 ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \
12857 (__v8si)(__m256i)_mm256_setzero_si256(),\
12858 (__mmask8)(U)))
12860 #define _mm_ror_epi32(A, B) \
12861 ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \
12862 (__v4si)(__m128i)_mm_setzero_si128 (), \
12863 (__mmask8)-1))
12865 #define _mm_mask_ror_epi32(W, U, A, B) \
12866 ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \
12867 (__v4si)(__m128i)(W), \
12868 (__mmask8)(U)))
12870 #define _mm_maskz_ror_epi32(U, A, B) \
12871 ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \
12872 (__v4si)(__m128i)_mm_setzero_si128 (), \
12873 (__mmask8)(U)))
12875 #define _mm256_alignr_epi32(X, Y, C) \
12876 ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X), \
12877 (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)(X), (__mmask8)-1))
12879 #define _mm256_mask_alignr_epi32(W, U, X, Y, C) \
12880 ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X), \
12881 (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)(W), (__mmask8)(U)))
12883 #define _mm256_maskz_alignr_epi32(U, X, Y, C) \
12884 ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X), \
12885 (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)_mm256_setzero_si256 (),\
12886 (__mmask8)(U)))
12888 #define _mm256_alignr_epi64(X, Y, C) \
12889 ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X), \
12890 (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)(X), (__mmask8)-1))
12892 #define _mm256_mask_alignr_epi64(W, U, X, Y, C) \
12893 ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X), \
12894 (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)(W), (__mmask8)(U)))
12896 #define _mm256_maskz_alignr_epi64(U, X, Y, C) \
12897 ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X), \
12898 (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)_mm256_setzero_si256 (),\
12899 (__mmask8)(U)))
12901 #define _mm_alignr_epi32(X, Y, C) \
12902 ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X), \
12903 (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)(X), (__mmask8)-1))
12905 #define _mm_mask_alignr_epi32(W, U, X, Y, C) \
12906 ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X), \
12907 (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)(W), (__mmask8)(U)))
12909 #define _mm_maskz_alignr_epi32(U, X, Y, C) \
12910 ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X), \
12911 (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)_mm_setzero_si128(),\
12912 (__mmask8)(U)))
12914 #define _mm_alignr_epi64(X, Y, C) \
12915 ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \
12916 (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(X), (__mmask8)-1))
12918 #define _mm_mask_alignr_epi64(W, U, X, Y, C) \
12919 ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \
12920 (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(X), (__mmask8)-1))
12922 #define _mm_maskz_alignr_epi64(U, X, Y, C) \
12923 ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \
12924 (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)_mm_setzero_si128(),\
12925 (__mmask8)(U)))
12927 #define _mm_mask_cvtps_ph(W, U, A, I) \
12928 ((__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf)(__m128) A, (int) (I), \
12929 (__v8hi)(__m128i) (W), (__mmask8) (U)))
12931 #define _mm_maskz_cvtps_ph(U, A, I) \
12932 ((__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf)(__m128) A, (int) (I), \
12933 (__v8hi)(__m128i) _mm_setzero_hi(), (__mmask8) (U)))
12935 #define _mm256_mask_cvtps_ph(W, U, A, I) \
12936 ((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) A, (int) (I), \
12937 (__v8hi)(__m128i) (W), (__mmask8) (U)))
12939 #define _mm256_maskz_cvtps_ph(U, A, I) \
12940 ((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) A, (int) (I), \
12941 (__v8hi)(__m128i) _mm_setzero_hi(), (__mmask8) (U)))
12943 #define _mm256_mask_srai_epi32(W, U, A, B) \
12944 ((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A), \
12945 (int)(B), (__v8si)(__m256i)(W), (__mmask8)(U)))
12947 #define _mm256_maskz_srai_epi32(U, A, B) \
12948 ((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A), \
12949 (int)(B), (__v8si)_mm256_setzero_si256(), (__mmask8)(U)))
12951 #define _mm_mask_srai_epi32(W, U, A, B) \
12952 ((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A), \
12953 (int)(B), (__v4si)(__m128i)(W), (__mmask8)(U)))
12955 #define _mm_maskz_srai_epi32(U, A, B) \
12956 ((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A), \
12957 (int)(B), (__v4si)_mm_setzero_si128(), (__mmask8)(U)))
12959 #define _mm256_srai_epi64(A, B) \
12960 ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \
12961 (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)-1))
12963 #define _mm256_mask_srai_epi64(W, U, A, B) \
12964 ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \
12965 (int)(B), (__v4di)(__m256i)(W), (__mmask8)(U)))
12967 #define _mm256_maskz_srai_epi64(U, A, B) \
12968 ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \
12969 (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)(U)))
12971 #define _mm_srai_epi64(A, B) \
12972 ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
12973 (int)(B), (__v2di)_mm_setzero_si128(), (__mmask8)-1))
12975 #define _mm_mask_srai_epi64(W, U, A, B) \
12976 ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
12977 (int)(B), (__v2di)(__m128i)(W), (__mmask8)(U)))
12979 #define _mm_maskz_srai_epi64(U, A, B) \
12980 ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
12981 (int)(B), (__v2di)_mm_setzero_si128(), (__mmask8)(U)))
12983 #define _mm256_mask_permutex_pd(W, U, A, B) \
12984 ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A), \
12985 (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
12987 #define _mm256_maskz_permutex_pd(U, A, B) \
12988 ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A), \
12989 (int)(B), (__v4df)(__m256d)_mm256_setzero_pd(), (__mmask8)(U)))
12991 #define _mm256_mask_permute_pd(W, U, X, C) \
12992 ((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C), \
12993 (__v4df)(__m256d)(W), \
12994 (__mmask8)(U)))
12996 #define _mm256_maskz_permute_pd(U, X, C) \
12997 ((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C), \
12998 (__v4df)(__m256d)_mm256_setzero_pd(), \
12999 (__mmask8)(U)))
13001 #define _mm256_mask_permute_ps(W, U, X, C) \
13002 ((__m256) __builtin_ia32_vpermilps256_mask ((__v8sf)(__m256)(X), (int)(C), \
13003 (__v8sf)(__m256)(W), (__mmask8)(U)))
13005 #define _mm256_maskz_permute_ps(U, X, C) \
13006 ((__m256) __builtin_ia32_vpermilps256_mask ((__v8sf)(__m256)(X), (int)(C), \
13007 (__v8sf)(__m256)_mm256_setzero_ps(), \
13008 (__mmask8)(U)))
13010 #define _mm_mask_permute_pd(W, U, X, C) \
13011 ((__m128d) __builtin_ia32_vpermilpd_mask ((__v2df)(__m128d)(X), (int)(C), \
13012 (__v2df)(__m128d)(W), (__mmask8)(U)))
13014 #define _mm_maskz_permute_pd(U, X, C) \
13015 ((__m128d) __builtin_ia32_vpermilpd_mask ((__v2df)(__m128d)(X), (int)(C), \
13016 (__v2df)(__m128d)_mm_setzero_pd(), \
13017 (__mmask8)(U)))
13019 #define _mm_mask_permute_ps(W, U, X, C) \
13020 ((__m128) __builtin_ia32_vpermilps_mask ((__v4sf)(__m128)(X), (int)(C), \
13021 (__v4sf)(__m128)(W), (__mmask8)(U)))
13023 #define _mm_maskz_permute_ps(U, X, C) \
13024 ((__m128) __builtin_ia32_vpermilps_mask ((__v4sf)(__m128)(X), (int)(C), \
13025 (__v4sf)(__m128)_mm_setzero_ps(), \
13026 (__mmask8)(U)))
13028 #define _mm256_mask_blend_pd(__U, __A, __W) \
13029 ((__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) (__A), \
13030 (__v4df) (__W), \
13031 (__mmask8) (__U)))
13033 #define _mm256_mask_blend_ps(__U, __A, __W) \
13034 ((__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) (__A), \
13035 (__v8sf) (__W), \
13036 (__mmask8) (__U)))
13038 #define _mm256_mask_blend_epi64(__U, __A, __W) \
13039 ((__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) (__A), \
13040 (__v4di) (__W), \
13041 (__mmask8) (__U)))
13043 #define _mm256_mask_blend_epi32(__U, __A, __W) \
13044 ((__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) (__A), \
13045 (__v8si) (__W), \
13046 (__mmask8) (__U)))
13048 #define _mm_mask_blend_pd(__U, __A, __W) \
13049 ((__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) (__A), \
13050 (__v2df) (__W), \
13051 (__mmask8) (__U)))
13053 #define _mm_mask_blend_ps(__U, __A, __W) \
13054 ((__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) (__A), \
13055 (__v4sf) (__W), \
13056 (__mmask8) (__U)))
13058 #define _mm_mask_blend_epi64(__U, __A, __W) \
13059 ((__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) (__A), \
13060 (__v2di) (__W), \
13061 (__mmask8) (__U)))
13063 #define _mm_mask_blend_epi32(__U, __A, __W) \
13064 ((__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) (__A), \
13065 (__v4si) (__W), \
13066 (__mmask8) (__U)))
13068 #define _mm256_cmp_epu32_mask(X, Y, P) \
13069 ((__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si)(__m256i)(X), \
13070 (__v8si)(__m256i)(Y), (int)(P),\
13071 (__mmask8)-1))
13073 #define _mm256_cmp_epi64_mask(X, Y, P) \
13074 ((__mmask8) __builtin_ia32_cmpq256_mask ((__v4di)(__m256i)(X), \
13075 (__v4di)(__m256i)(Y), (int)(P),\
13076 (__mmask8)-1))
13078 #define _mm256_cmp_epi32_mask(X, Y, P) \
13079 ((__mmask8) __builtin_ia32_cmpd256_mask ((__v8si)(__m256i)(X), \
13080 (__v8si)(__m256i)(Y), (int)(P),\
13081 (__mmask8)-1))
13083 #define _mm256_cmp_epu64_mask(X, Y, P) \
13084 ((__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di)(__m256i)(X), \
13085 (__v4di)(__m256i)(Y), (int)(P),\
13086 (__mmask8)-1))
13088 #define _mm256_cmp_pd_mask(X, Y, P) \
13089 ((__mmask8) __builtin_ia32_cmppd256_mask ((__v4df)(__m256d)(X), \
13090 (__v4df)(__m256d)(Y), (int)(P),\
13091 (__mmask8)-1))
13093 #define _mm256_cmp_ps_mask(X, Y, P) \
13094 ((__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf)(__m256)(X), \
13095 (__v8sf)(__m256)(Y), (int)(P),\
13096 (__mmask8)-1))
13098 #define _mm256_mask_cmp_epi64_mask(M, X, Y, P) \
13099 ((__mmask8) __builtin_ia32_cmpq256_mask ((__v4di)(__m256i)(X), \
13100 (__v4di)(__m256i)(Y), (int)(P),\
13101 (__mmask8)(M)))
13103 #define _mm256_mask_cmp_epi32_mask(M, X, Y, P) \
13104 ((__mmask8) __builtin_ia32_cmpd256_mask ((__v8si)(__m256i)(X), \
13105 (__v8si)(__m256i)(Y), (int)(P),\
13106 (__mmask8)(M)))
13108 #define _mm256_mask_cmp_epu64_mask(M, X, Y, P) \
13109 ((__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di)(__m256i)(X), \
13110 (__v4di)(__m256i)(Y), (int)(P),\
13111 (__mmask8)(M)))
13113 #define _mm256_mask_cmp_epu32_mask(M, X, Y, P) \
13114 ((__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si)(__m256i)(X), \
13115 (__v8si)(__m256i)(Y), (int)(P),\
13116 (__mmask8)(M)))
13118 #define _mm256_mask_cmp_pd_mask(M, X, Y, P) \
13119 ((__mmask8) __builtin_ia32_cmppd256_mask ((__v4df)(__m256d)(X), \
13120 (__v4df)(__m256d)(Y), (int)(P),\
13121 (__mmask8)(M)))
13123 #define _mm256_mask_cmp_ps_mask(M, X, Y, P) \
13124 ((__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf)(__m256)(X), \
13125 (__v8sf)(__m256)(Y), (int)(P),\
13126 (__mmask8)(M)))
13128 #define _mm_cmp_epi64_mask(X, Y, P) \
13129 ((__mmask8) __builtin_ia32_cmpq128_mask ((__v2di)(__m128i)(X), \
13130 (__v2di)(__m128i)(Y), (int)(P),\
13131 (__mmask8)-1))
13133 #define _mm_cmp_epi32_mask(X, Y, P) \
13134 ((__mmask8) __builtin_ia32_cmpd128_mask ((__v4si)(__m128i)(X), \
13135 (__v4si)(__m128i)(Y), (int)(P),\
13136 (__mmask8)-1))
13138 #define _mm_cmp_epu64_mask(X, Y, P) \
13139 ((__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di)(__m128i)(X), \
13140 (__v2di)(__m128i)(Y), (int)(P),\
13141 (__mmask8)-1))
13143 #define _mm_cmp_epu32_mask(X, Y, P) \
13144 ((__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si)(__m128i)(X), \
13145 (__v4si)(__m128i)(Y), (int)(P),\
13146 (__mmask8)-1))
13148 #define _mm_cmp_pd_mask(X, Y, P) \
13149 ((__mmask8) __builtin_ia32_cmppd128_mask ((__v2df)(__m128d)(X), \
13150 (__v2df)(__m128d)(Y), (int)(P),\
13151 (__mmask8)-1))
13153 #define _mm_cmp_ps_mask(X, Y, P) \
13154 ((__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf)(__m128)(X), \
13155 (__v4sf)(__m128)(Y), (int)(P),\
13156 (__mmask8)-1))
13158 #define _mm_mask_cmp_epi64_mask(M, X, Y, P) \
13159 ((__mmask8) __builtin_ia32_cmpq128_mask ((__v2di)(__m128i)(X), \
13160 (__v2di)(__m128i)(Y), (int)(P),\
13161 (__mmask8)(M)))
13163 #define _mm_mask_cmp_epi32_mask(M, X, Y, P) \
13164 ((__mmask8) __builtin_ia32_cmpd128_mask ((__v4si)(__m128i)(X), \
13165 (__v4si)(__m128i)(Y), (int)(P),\
13166 (__mmask8)(M)))
13168 #define _mm_mask_cmp_epu64_mask(M, X, Y, P) \
13169 ((__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di)(__m128i)(X), \
13170 (__v2di)(__m128i)(Y), (int)(P),\
13171 (__mmask8)(M)))
13173 #define _mm_mask_cmp_epu32_mask(M, X, Y, P) \
13174 ((__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si)(__m128i)(X), \
13175 (__v4si)(__m128i)(Y), (int)(P),\
13176 (__mmask8)(M)))
13178 #define _mm_mask_cmp_pd_mask(M, X, Y, P) \
13179 ((__mmask8) __builtin_ia32_cmppd128_mask ((__v2df)(__m128d)(X), \
13180 (__v2df)(__m128d)(Y), (int)(P),\
13181 (__mmask8)(M)))
13183 #define _mm_mask_cmp_ps_mask(M, X, Y, P) \
13184 ((__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf)(__m128)(X), \
13185 (__v4sf)(__m128)(Y), (int)(P),\
13186 (__mmask8)(M)))
13188 #endif
13190 #define _mm256_mask_ceil_ps(A, B, C) _mm256_mask_roundscale_ps((A), (B), (C), _MM_FROUND_CEIL)
13191 #define _mm256_mask_floor_ps(A, B, C) _mm256_mask_roundscale_ps((A), (B), (C), _MM_FROUND_FLOOR)
13192 #define _mm256_mask_ceil_pd(A, B, C) _mm256_mask_roundscale_pd((A), (B), (C), _MM_FROUND_CEIL)
13193 #define _mm256_mask_floor_pd(A, B, C) _mm256_mask_roundscale_pd((A), (B), (C), _MM_FROUND_FLOOR)
13194 #define _mm256_maskz_ceil_ps(A, B) _mm256_maskz_roundscale_ps((A), (B), _MM_FROUND_CEIL)
13195 #define _mm256_maskz_floor_ps(A, B) _mm256_maskz_roundscale_ps((A), (B), _MM_FROUND_FLOOR)
13196 #define _mm256_maskz_ceil_pd(A, B) _mm256_maskz_roundscale_pd((A), (B), _MM_FROUND_CEIL)
13197 #define _mm256_maskz_floor_pd(A, B) _mm256_maskz_roundscale_pd((A), (B), _MM_FROUND_FLOOR)
13198 #define _mm_mask_ceil_ps(A, B, C) _mm_mask_roundscale_ps((A), (B), (C), _MM_FROUND_CEIL)
13199 #define _mm_mask_floor_ps(A, B, C) _mm_mask_roundscale_ps((A), (B), (C), _MM_FROUND_FLOOR)
13200 #define _mm_mask_ceil_pd(A, B, C) _mm_mask_roundscale_pd((A), (B), (C), _MM_FROUND_CEIL)
13201 #define _mm_mask_floor_pd(A, B, C) _mm_mask_roundscale_pd((A), (B), (C), _MM_FROUND_FLOOR)
13202 #define _mm_maskz_ceil_ps(A, B) _mm_maskz_roundscale_ps((A), (B), _MM_FROUND_CEIL)
13203 #define _mm_maskz_floor_ps(A, B) _mm_maskz_roundscale_ps((A), (B), _MM_FROUND_FLOOR)
13204 #define _mm_maskz_ceil_pd(A, B) _mm_maskz_roundscale_pd((A), (B), _MM_FROUND_CEIL)
13205 #define _mm_maskz_floor_pd(A, B) _mm_maskz_roundscale_pd((A), (B), _MM_FROUND_FLOOR)
13206 #define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps((B), (A))
13208 #ifdef __DISABLE_AVX512VL__
13209 #undef __DISABLE_AVX512VL__
13210 #pragma GCC pop_options
13211 #endif /* __DISABLE_AVX512VL__ */
13213 #endif /* _AVX512VLINTRIN_H_INCLUDED */