svn merge -r215707:216846 svn+ssh://gcc.gnu.org/svn/gcc/trunk
[official-gcc.git] / gcc / config / i386 / avx512vlintrin.h
blobf39f7f386af8efc8fa449a19f583bb9336386723
1 /* Copyright (C) 2014
2 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 Under Section 7 of GPL version 3, you are granted additional
17 permissions described in the GCC Runtime Library Exception, version
18 3.1, as published by the Free Software Foundation.
20 You should have received a copy of the GNU General Public License and
21 a copy of the GCC Runtime Library Exception along with this program;
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 <http://www.gnu.org/licenses/>. */
25 #ifndef _IMMINTRIN_H_INCLUDED
26 #error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
27 #endif
29 #ifndef _AVX512VLINTRIN_H_INCLUDED
30 #define _AVX512VLINTRIN_H_INCLUDED
32 /* Doesn't require avx512vl target and is used in avx512dqintrin.h. */
33 extern __inline __m128i
34 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
35 _mm_setzero_di (void)
37 return __extension__ (__m128i)(__v2di){ 0LL, 0LL};
40 #ifndef __AVX512VL__
41 #pragma GCC push_options
42 #pragma GCC target("avx512vl")
43 #define __DISABLE_AVX512VL__
44 #endif /* __AVX512VL__ */
46 /* Internal data types for implementing the intrinsics. */
47 typedef unsigned int __mmask32;
49 extern __inline __m256d
50 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
51 _mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A)
53 return (__m256d) __builtin_ia32_movapd256_mask ((__v4df) __A,
54 (__v4df) __W,
55 (__mmask8) __U);
58 extern __inline __m256d
59 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
60 _mm256_maskz_mov_pd (__mmask8 __U, __m256d __A)
62 return (__m256d) __builtin_ia32_movapd256_mask ((__v4df) __A,
63 (__v4df)
64 _mm256_setzero_pd (),
65 (__mmask8) __U);
68 extern __inline __m128d
69 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
70 _mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A)
72 return (__m128d) __builtin_ia32_movapd128_mask ((__v2df) __A,
73 (__v2df) __W,
74 (__mmask8) __U);
77 extern __inline __m128d
78 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79 _mm_maskz_mov_pd (__mmask8 __U, __m128d __A)
81 return (__m128d) __builtin_ia32_movapd128_mask ((__v2df) __A,
82 (__v2df)
83 _mm_setzero_pd (),
84 (__mmask8) __U);
87 extern __inline __m256d
88 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
89 _mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P)
91 return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
92 (__v4df) __W,
93 (__mmask8) __U);
96 extern __inline __m256d
97 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
98 _mm256_maskz_load_pd (__mmask8 __U, void const *__P)
100 return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
101 (__v4df)
102 _mm256_setzero_pd (),
103 (__mmask8) __U);
106 extern __inline __m128d
107 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
108 _mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P)
110 return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
111 (__v2df) __W,
112 (__mmask8) __U);
115 extern __inline __m128d
116 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
117 _mm_maskz_load_pd (__mmask8 __U, void const *__P)
119 return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
120 (__v2df)
121 _mm_setzero_pd (),
122 (__mmask8) __U);
125 extern __inline void
126 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
127 _mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A)
129 __builtin_ia32_storeapd256_mask ((__v4df *) __P,
130 (__v4df) __A,
131 (__mmask8) __U);
134 extern __inline void
135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
136 _mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A)
138 __builtin_ia32_storeapd128_mask ((__v2df *) __P,
139 (__v2df) __A,
140 (__mmask8) __U);
143 extern __inline __m256
144 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
145 _mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A)
147 return (__m256) __builtin_ia32_movaps256_mask ((__v8sf) __A,
148 (__v8sf) __W,
149 (__mmask8) __U);
152 extern __inline __m256
153 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
154 _mm256_maskz_mov_ps (__mmask8 __U, __m256 __A)
156 return (__m256) __builtin_ia32_movaps256_mask ((__v8sf) __A,
157 (__v8sf)
158 _mm256_setzero_ps (),
159 (__mmask8) __U);
162 extern __inline __m128
163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
164 _mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A)
166 return (__m128) __builtin_ia32_movaps128_mask ((__v4sf) __A,
167 (__v4sf) __W,
168 (__mmask8) __U);
171 extern __inline __m128
172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
173 _mm_maskz_mov_ps (__mmask8 __U, __m128 __A)
175 return (__m128) __builtin_ia32_movaps128_mask ((__v4sf) __A,
176 (__v4sf)
177 _mm_setzero_ps (),
178 (__mmask8) __U);
181 extern __inline __m256
182 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
183 _mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P)
185 return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
186 (__v8sf) __W,
187 (__mmask8) __U);
190 extern __inline __m256
191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
192 _mm256_maskz_load_ps (__mmask8 __U, void const *__P)
194 return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
195 (__v8sf)
196 _mm256_setzero_ps (),
197 (__mmask8) __U);
200 extern __inline __m128
201 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
202 _mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P)
204 return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
205 (__v4sf) __W,
206 (__mmask8) __U);
209 extern __inline __m128
210 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
211 _mm_maskz_load_ps (__mmask8 __U, void const *__P)
213 return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
214 (__v4sf)
215 _mm_setzero_ps (),
216 (__mmask8) __U);
219 extern __inline void
220 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
221 _mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A)
223 __builtin_ia32_storeaps256_mask ((__v8sf *) __P,
224 (__v8sf) __A,
225 (__mmask8) __U);
228 extern __inline void
229 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
230 _mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A)
232 __builtin_ia32_storeaps128_mask ((__v4sf *) __P,
233 (__v4sf) __A,
234 (__mmask8) __U);
237 extern __inline __m256i
238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
239 _mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
241 return (__m256i) __builtin_ia32_movdqa64_256_mask ((__v4di) __A,
242 (__v4di) __W,
243 (__mmask8) __U);
246 extern __inline __m256i
247 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
248 _mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A)
250 return (__m256i) __builtin_ia32_movdqa64_256_mask ((__v4di) __A,
251 (__v4di)
252 _mm256_setzero_si256 (),
253 (__mmask8) __U);
256 extern __inline __m128i
257 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
258 _mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
260 return (__m128i) __builtin_ia32_movdqa64_128_mask ((__v2di) __A,
261 (__v2di) __W,
262 (__mmask8) __U);
265 extern __inline __m128i
266 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
267 _mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A)
269 return (__m128i) __builtin_ia32_movdqa64_128_mask ((__v2di) __A,
270 (__v2di)
271 _mm_setzero_di (),
272 (__mmask8) __U);
275 extern __inline __m256i
276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
277 _mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
279 return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
280 (__v4di) __W,
281 (__mmask8)
282 __U);
285 extern __inline __m256i
286 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
287 _mm256_maskz_load_epi64 (__mmask8 __U, void const *__P)
289 return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
290 (__v4di)
291 _mm256_setzero_si256 (),
292 (__mmask8)
293 __U);
296 extern __inline __m128i
297 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
298 _mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
300 return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
301 (__v2di) __W,
302 (__mmask8)
303 __U);
306 extern __inline __m128i
307 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
308 _mm_maskz_load_epi64 (__mmask8 __U, void const *__P)
310 return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
311 (__v2di)
312 _mm_setzero_di (),
313 (__mmask8)
314 __U);
317 extern __inline void
318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
319 _mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A)
321 __builtin_ia32_movdqa64store256_mask ((__v4di *) __P,
322 (__v4di) __A,
323 (__mmask8) __U);
326 extern __inline void
327 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
328 _mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A)
330 __builtin_ia32_movdqa64store128_mask ((__v2di *) __P,
331 (__v2di) __A,
332 (__mmask8) __U);
335 extern __inline __m256i
336 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
337 _mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
339 return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A,
340 (__v8si) __W,
341 (__mmask8) __U);
344 extern __inline __m256i
345 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
346 _mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A)
348 return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A,
349 (__v8si)
350 _mm256_setzero_si256 (),
351 (__mmask8) __U);
354 extern __inline __m128i
355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
356 _mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
358 return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A,
359 (__v4si) __W,
360 (__mmask8) __U);
363 extern __inline __m128i
364 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
365 _mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A)
367 return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A,
368 (__v4si)
369 _mm_setzero_si128 (),
370 (__mmask8) __U);
373 extern __inline __m256i
374 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
375 _mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
377 return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
378 (__v8si) __W,
379 (__mmask8)
380 __U);
383 extern __inline __m256i
384 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
385 _mm256_maskz_load_epi32 (__mmask8 __U, void const *__P)
387 return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
388 (__v8si)
389 _mm256_setzero_si256 (),
390 (__mmask8)
391 __U);
394 extern __inline __m128i
395 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
396 _mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
398 return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
399 (__v4si) __W,
400 (__mmask8)
401 __U);
404 extern __inline __m128i
405 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
406 _mm_maskz_load_epi32 (__mmask8 __U, void const *__P)
408 return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
409 (__v4si)
410 _mm_setzero_si128 (),
411 (__mmask8)
412 __U);
415 extern __inline void
416 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
417 _mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A)
419 __builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
420 (__v8si) __A,
421 (__mmask8) __U);
424 extern __inline void
425 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
426 _mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
428 __builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
429 (__v4si) __A,
430 (__mmask8) __U);
433 extern __inline __m128i
434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
435 _mm_setzero_hi (void)
437 return __extension__ (__m128i) (__v8hi)
439 0, 0, 0, 0, 0, 0, 0, 0};
442 extern __inline __m128d
443 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
444 _mm_mask_add_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
446 return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
447 (__v2df) __B,
448 (__v2df) __W,
449 (__mmask8) __U);
452 extern __inline __m128d
453 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
454 _mm_maskz_add_pd (__mmask8 __U, __m128d __A, __m128d __B)
456 return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
457 (__v2df) __B,
458 (__v2df)
459 _mm_setzero_pd (),
460 (__mmask8) __U);
463 extern __inline __m256d
464 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
465 _mm256_mask_add_pd (__m256d __W, __mmask8 __U, __m256d __A,
466 __m256d __B)
468 return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
469 (__v4df) __B,
470 (__v4df) __W,
471 (__mmask8) __U);
474 extern __inline __m256d
475 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
476 _mm256_maskz_add_pd (__mmask8 __U, __m256d __A, __m256d __B)
478 return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
479 (__v4df) __B,
480 (__v4df)
481 _mm256_setzero_pd (),
482 (__mmask8) __U);
485 extern __inline __m128
486 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
487 _mm_mask_add_ps (__m128 __W, __mmask16 __U, __m128 __A, __m128 __B)
489 return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
490 (__v4sf) __B,
491 (__v4sf) __W,
492 (__mmask8) __U);
495 extern __inline __m128
496 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
497 _mm_maskz_add_ps (__mmask16 __U, __m128 __A, __m128 __B)
499 return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
500 (__v4sf) __B,
501 (__v4sf)
502 _mm_setzero_ps (),
503 (__mmask8) __U);
506 extern __inline __m256
507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
508 _mm256_mask_add_ps (__m256 __W, __mmask16 __U, __m256 __A, __m256 __B)
510 return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
511 (__v8sf) __B,
512 (__v8sf) __W,
513 (__mmask8) __U);
516 extern __inline __m256
517 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
518 _mm256_maskz_add_ps (__mmask16 __U, __m256 __A, __m256 __B)
520 return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
521 (__v8sf) __B,
522 (__v8sf)
523 _mm256_setzero_ps (),
524 (__mmask8) __U);
527 extern __inline __m128d
528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
529 _mm_mask_sub_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
531 return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
532 (__v2df) __B,
533 (__v2df) __W,
534 (__mmask8) __U);
537 extern __inline __m128d
538 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
539 _mm_maskz_sub_pd (__mmask8 __U, __m128d __A, __m128d __B)
541 return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
542 (__v2df) __B,
543 (__v2df)
544 _mm_setzero_pd (),
545 (__mmask8) __U);
548 extern __inline __m256d
549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
550 _mm256_mask_sub_pd (__m256d __W, __mmask8 __U, __m256d __A,
551 __m256d __B)
553 return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
554 (__v4df) __B,
555 (__v4df) __W,
556 (__mmask8) __U);
559 extern __inline __m256d
560 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
561 _mm256_maskz_sub_pd (__mmask8 __U, __m256d __A, __m256d __B)
563 return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
564 (__v4df) __B,
565 (__v4df)
566 _mm256_setzero_pd (),
567 (__mmask8) __U);
570 extern __inline __m128
571 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
572 _mm_mask_sub_ps (__m128 __W, __mmask16 __U, __m128 __A, __m128 __B)
574 return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
575 (__v4sf) __B,
576 (__v4sf) __W,
577 (__mmask8) __U);
580 extern __inline __m128
581 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
582 _mm_maskz_sub_ps (__mmask16 __U, __m128 __A, __m128 __B)
584 return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
585 (__v4sf) __B,
586 (__v4sf)
587 _mm_setzero_ps (),
588 (__mmask8) __U);
591 extern __inline __m256
592 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
593 _mm256_mask_sub_ps (__m256 __W, __mmask16 __U, __m256 __A, __m256 __B)
595 return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
596 (__v8sf) __B,
597 (__v8sf) __W,
598 (__mmask8) __U);
601 extern __inline __m256
602 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
603 _mm256_maskz_sub_ps (__mmask16 __U, __m256 __A, __m256 __B)
605 return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
606 (__v8sf) __B,
607 (__v8sf)
608 _mm256_setzero_ps (),
609 (__mmask8) __U);
612 extern __inline void
613 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
614 _mm256_store_epi64 (void *__P, __m256i __A)
616 *(__m256i *) __P = __A;
619 extern __inline void
620 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
621 _mm_store_epi64 (void *__P, __m128i __A)
623 *(__m128i *) __P = __A;
626 extern __inline __m256d
627 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
628 _mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P)
630 return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
631 (__v4df) __W,
632 (__mmask8) __U);
635 extern __inline __m256d
636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
637 _mm256_maskz_loadu_pd (__mmask8 __U, void const *__P)
639 return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
640 (__v4df)
641 _mm256_setzero_pd (),
642 (__mmask8) __U);
645 extern __inline __m128d
646 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
647 _mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P)
649 return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
650 (__v2df) __W,
651 (__mmask8) __U);
654 extern __inline __m128d
655 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
656 _mm_maskz_loadu_pd (__mmask8 __U, void const *__P)
658 return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
659 (__v2df)
660 _mm_setzero_pd (),
661 (__mmask8) __U);
664 extern __inline void
665 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
666 _mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A)
668 __builtin_ia32_storeupd256_mask ((__v4df *) __P,
669 (__v4df) __A,
670 (__mmask8) __U);
673 extern __inline void
674 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
675 _mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A)
677 __builtin_ia32_storeupd128_mask ((__v2df *) __P,
678 (__v2df) __A,
679 (__mmask8) __U);
682 extern __inline __m256
683 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
684 _mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P)
686 return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
687 (__v8sf) __W,
688 (__mmask8) __U);
691 extern __inline __m256
692 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
693 _mm256_maskz_loadu_ps (__mmask8 __U, void const *__P)
695 return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
696 (__v8sf)
697 _mm256_setzero_ps (),
698 (__mmask8) __U);
701 extern __inline __m128
702 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
703 _mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P)
705 return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
706 (__v4sf) __W,
707 (__mmask8) __U);
710 extern __inline __m128
711 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
712 _mm_maskz_loadu_ps (__mmask8 __U, void const *__P)
714 return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
715 (__v4sf)
716 _mm_setzero_ps (),
717 (__mmask8) __U);
720 extern __inline void
721 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
722 _mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A)
724 __builtin_ia32_storeups256_mask ((__v8sf *) __P,
725 (__v8sf) __A,
726 (__mmask8) __U);
729 extern __inline void
730 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
731 _mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A)
733 __builtin_ia32_storeups128_mask ((__v4sf *) __P,
734 (__v4sf) __A,
735 (__mmask8) __U);
738 extern __inline __m256i
739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
740 _mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
742 return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
743 (__v4di) __W,
744 (__mmask8) __U);
747 extern __inline __m256i
748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
749 _mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
751 return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
752 (__v4di)
753 _mm256_setzero_si256 (),
754 (__mmask8) __U);
757 extern __inline __m128i
758 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
759 _mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
761 return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
762 (__v2di) __W,
763 (__mmask8) __U);
766 extern __inline __m128i
767 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
768 _mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
770 return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
771 (__v2di)
772 _mm_setzero_di (),
773 (__mmask8) __U);
776 extern __inline void
777 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
778 _mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
780 __builtin_ia32_storedqudi256_mask ((__v4di *) __P,
781 (__v4di) __A,
782 (__mmask8) __U);
785 extern __inline void
786 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
787 _mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
789 __builtin_ia32_storedqudi128_mask ((__v2di *) __P,
790 (__v2di) __A,
791 (__mmask8) __U);
794 extern __inline __m256i
795 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
796 _mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
798 return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
799 (__v8si) __W,
800 (__mmask8) __U);
803 extern __inline __m256i
804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
805 _mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
807 return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
808 (__v8si)
809 _mm256_setzero_si256 (),
810 (__mmask8) __U);
813 extern __inline __m128i
814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
815 _mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
817 return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
818 (__v4si) __W,
819 (__mmask8) __U);
822 extern __inline __m128i
823 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
824 _mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
826 return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
827 (__v4si)
828 _mm_setzero_si128 (),
829 (__mmask8) __U);
832 extern __inline void
833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
834 _mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A)
836 __builtin_ia32_storedqusi256_mask ((__v8si *) __P,
837 (__v8si) __A,
838 (__mmask8) __U);
841 extern __inline void
842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
843 _mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
845 __builtin_ia32_storedqusi128_mask ((__v4si *) __P,
846 (__v4si) __A,
847 (__mmask8) __U);
850 extern __inline __m256i
851 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
852 _mm256_mask_abs_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
854 return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
855 (__v8si) __W,
856 (__mmask8) __U);
859 extern __inline __m256i
860 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
861 _mm256_maskz_abs_epi32 (__mmask8 __U, __m256i __A)
863 return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
864 (__v8si)
865 _mm256_setzero_si256 (),
866 (__mmask8) __U);
869 extern __inline __m128i
870 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
871 _mm_mask_abs_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
873 return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
874 (__v4si) __W,
875 (__mmask8) __U);
878 extern __inline __m128i
879 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
880 _mm_maskz_abs_epi32 (__mmask8 __U, __m128i __A)
882 return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
883 (__v4si)
884 _mm_setzero_si128 (),
885 (__mmask8) __U);
888 extern __inline __m256i
889 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
890 _mm256_abs_epi64 (__m256i __A)
892 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
893 (__v4di)
894 _mm256_setzero_si256 (),
895 (__mmask8) -1);
898 extern __inline __m256i
899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
900 _mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
902 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
903 (__v4di) __W,
904 (__mmask8) __U);
907 extern __inline __m256i
908 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
909 _mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A)
911 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
912 (__v4di)
913 _mm256_setzero_si256 (),
914 (__mmask8) __U);
917 extern __inline __m128i
918 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
919 _mm_abs_epi64 (__m128i __A)
921 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
922 (__v2di)
923 _mm_setzero_di (),
924 (__mmask8) -1);
927 extern __inline __m128i
928 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
929 _mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
931 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
932 (__v2di) __W,
933 (__mmask8) __U);
936 extern __inline __m128i
937 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
938 _mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A)
940 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
941 (__v2di)
942 _mm_setzero_di (),
943 (__mmask8) __U);
946 extern __inline __m128i
947 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
948 _mm256_cvtpd_epu32 (__m256d __A)
950 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
951 (__v4si)
952 _mm_setzero_si128 (),
953 (__mmask8) -1);
956 extern __inline __m128i
957 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
958 _mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A)
960 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
961 (__v4si) __W,
962 (__mmask8) __U);
965 extern __inline __m128i
966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
967 _mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A)
969 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
970 (__v4si)
971 _mm_setzero_si128 (),
972 (__mmask8) __U);
975 extern __inline __m128i
976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
977 _mm_cvtpd_epu32 (__m128d __A)
979 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
980 (__v4si)
981 _mm_setzero_si128 (),
982 (__mmask8) -1);
985 extern __inline __m128i
986 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
987 _mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A)
989 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
990 (__v4si) __W,
991 (__mmask8) __U);
994 extern __inline __m128i
995 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
996 _mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A)
998 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
999 (__v4si)
1000 _mm_setzero_si128 (),
1001 (__mmask8) __U);
1004 extern __inline __m256i
1005 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1006 _mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A)
1008 return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
1009 (__v8si) __W,
1010 (__mmask8) __U);
1013 extern __inline __m256i
1014 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1015 _mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A)
1017 return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
1018 (__v8si)
1019 _mm256_setzero_si256 (),
1020 (__mmask8) __U);
1023 extern __inline __m128i
1024 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1025 _mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A)
1027 return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
1028 (__v4si) __W,
1029 (__mmask8) __U);
1032 extern __inline __m128i
1033 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1034 _mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A)
1036 return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
1037 (__v4si)
1038 _mm_setzero_si128 (),
1039 (__mmask8) __U);
1042 extern __inline __m256i
1043 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1044 _mm256_cvttps_epu32 (__m256 __A)
1046 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
1047 (__v8si)
1048 _mm256_setzero_si256 (),
1049 (__mmask8) -1);
1052 extern __inline __m256i
1053 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1054 _mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A)
1056 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
1057 (__v8si) __W,
1058 (__mmask8) __U);
1061 extern __inline __m256i
1062 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1063 _mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A)
1065 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
1066 (__v8si)
1067 _mm256_setzero_si256 (),
1068 (__mmask8) __U);
1071 extern __inline __m128i
1072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1073 _mm_cvttps_epu32 (__m128 __A)
1075 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
1076 (__v4si)
1077 _mm_setzero_si128 (),
1078 (__mmask8) -1);
1081 extern __inline __m128i
1082 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1083 _mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A)
1085 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
1086 (__v4si) __W,
1087 (__mmask8) __U);
1090 extern __inline __m128i
1091 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1092 _mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A)
1094 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
1095 (__v4si)
1096 _mm_setzero_si128 (),
1097 (__mmask8) __U);
1100 extern __inline __m128i
1101 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1102 _mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A)
1104 return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
1105 (__v4si) __W,
1106 (__mmask8) __U);
1109 extern __inline __m128i
1110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1111 _mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A)
1113 return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
1114 (__v4si)
1115 _mm_setzero_si128 (),
1116 (__mmask8) __U);
1119 extern __inline __m128i
1120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1121 _mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A)
1123 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
1124 (__v4si) __W,
1125 (__mmask8) __U);
1128 extern __inline __m128i
1129 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1130 _mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A)
1132 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
1133 (__v4si)
1134 _mm_setzero_si128 (),
1135 (__mmask8) __U);
1138 extern __inline __m128i
1139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1140 _mm256_cvttpd_epu32 (__m256d __A)
1142 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
1143 (__v4si)
1144 _mm_setzero_si128 (),
1145 (__mmask8) -1);
1148 extern __inline __m128i
1149 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1150 _mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A)
1152 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
1153 (__v4si) __W,
1154 (__mmask8) __U);
1157 extern __inline __m128i
1158 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1159 _mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A)
1161 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
1162 (__v4si)
1163 _mm_setzero_si128 (),
1164 (__mmask8) __U);
1167 extern __inline __m128i
1168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1169 _mm_cvttpd_epu32 (__m128d __A)
1171 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
1172 (__v4si)
1173 _mm_setzero_si128 (),
1174 (__mmask8) -1);
1177 extern __inline __m128i
1178 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1179 _mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A)
1181 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
1182 (__v4si) __W,
1183 (__mmask8) __U);
1186 extern __inline __m128i
1187 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1188 _mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A)
1190 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
1191 (__v4si)
1192 _mm_setzero_si128 (),
1193 (__mmask8) __U);
1196 extern __inline __m128i
1197 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1198 _mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A)
1200 return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
1201 (__v4si) __W,
1202 (__mmask8) __U);
1205 extern __inline __m128i
1206 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1207 _mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A)
1209 return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
1210 (__v4si)
1211 _mm_setzero_si128 (),
1212 (__mmask8) __U);
1215 extern __inline __m128i
1216 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1217 _mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A)
1219 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1220 (__v4si) __W,
1221 (__mmask8) __U);
1224 extern __inline __m128i
1225 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1226 _mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A)
1228 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1229 (__v4si)
1230 _mm_setzero_si128 (),
1231 (__mmask8) __U);
1234 extern __inline __m256d
1235 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1236 _mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A)
1238 return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
1239 (__v4df) __W,
1240 (__mmask8) __U);
1243 extern __inline __m256d
1244 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1245 _mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A)
1247 return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
1248 (__v4df)
1249 _mm256_setzero_pd (),
1250 (__mmask8) __U);
1253 extern __inline __m128d
1254 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1255 _mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A)
1257 return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
1258 (__v2df) __W,
1259 (__mmask8) __U);
1262 extern __inline __m128d
1263 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1264 _mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A)
1266 return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
1267 (__v2df)
1268 _mm_setzero_pd (),
1269 (__mmask8) __U);
1272 extern __inline __m256d
1273 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1274 _mm256_cvtepu32_pd (__m128i __A)
1276 return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
1277 (__v4df)
1278 _mm256_setzero_pd (),
1279 (__mmask8) -1);
1282 extern __inline __m256d
1283 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1284 _mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A)
1286 return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
1287 (__v4df) __W,
1288 (__mmask8) __U);
1291 extern __inline __m256d
1292 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1293 _mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A)
1295 return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
1296 (__v4df)
1297 _mm256_setzero_pd (),
1298 (__mmask8) __U);
1301 extern __inline __m128d
1302 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1303 _mm_cvtepu32_pd (__m128i __A)
1305 return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
1306 (__v2df)
1307 _mm_setzero_pd (),
1308 (__mmask8) -1);
1311 extern __inline __m128d
1312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1313 _mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A)
1315 return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
1316 (__v2df) __W,
1317 (__mmask8) __U);
1320 extern __inline __m128d
1321 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1322 _mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A)
1324 return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
1325 (__v2df)
1326 _mm_setzero_pd (),
1327 (__mmask8) __U);
1330 extern __inline __m256
1331 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1332 _mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A)
1334 return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
1335 (__v8sf) __W,
1336 (__mmask8) __U);
1339 extern __inline __m256
1340 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1341 _mm256_maskz_cvtepi32_ps (__mmask16 __U, __m256i __A)
1343 return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
1344 (__v8sf)
1345 _mm256_setzero_ps (),
1346 (__mmask8) __U);
1349 extern __inline __m128
1350 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1351 _mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A)
1353 return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
1354 (__v4sf) __W,
1355 (__mmask8) __U);
1358 extern __inline __m128
1359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1360 _mm_maskz_cvtepi32_ps (__mmask16 __U, __m128i __A)
1362 return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
1363 (__v4sf)
1364 _mm_setzero_ps (),
1365 (__mmask8) __U);
1368 extern __inline __m256
1369 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1370 _mm256_cvtepu32_ps (__m256i __A)
1372 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
1373 (__v8sf)
1374 _mm256_setzero_ps (),
1375 (__mmask8) -1);
1378 extern __inline __m256
1379 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1380 _mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A)
1382 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
1383 (__v8sf) __W,
1384 (__mmask8) __U);
1387 extern __inline __m256
1388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1389 _mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A)
1391 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
1392 (__v8sf)
1393 _mm256_setzero_ps (),
1394 (__mmask8) __U);
1397 extern __inline __m128
1398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1399 _mm_cvtepu32_ps (__m128i __A)
1401 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
1402 (__v4sf)
1403 _mm_setzero_ps (),
1404 (__mmask8) -1);
1407 extern __inline __m128
1408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1409 _mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A)
1411 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
1412 (__v4sf) __W,
1413 (__mmask8) __U);
1416 extern __inline __m128
1417 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1418 _mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A)
1420 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
1421 (__v4sf)
1422 _mm_setzero_ps (),
1423 (__mmask8) __U);
1426 extern __inline __m256d
1427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1428 _mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A)
1430 return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
1431 (__v4df) __W,
1432 (__mmask8) __U);
1435 extern __inline __m256d
1436 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1437 _mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A)
1439 return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
1440 (__v4df)
1441 _mm256_setzero_pd (),
1442 (__mmask8) __U);
1445 extern __inline __m128d
1446 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1447 _mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A)
1449 return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
1450 (__v2df) __W,
1451 (__mmask8) __U);
1454 extern __inline __m128d
1455 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1456 _mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A)
1458 return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
1459 (__v2df)
1460 _mm_setzero_pd (),
1461 (__mmask8) __U);
1464 extern __inline __m128i
1465 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1466 _mm_cvtepi32_epi8 (__m128i __A)
1468 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
1469 (__v16qi)_mm_undefined_si128(),
1470 (__mmask8) -1);
1473 extern __inline void
1474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1475 _mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1477 __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
1480 extern __inline __m128i
1481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1482 _mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1484 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
1485 (__v16qi) __O, __M);
1488 extern __inline __m128i
1489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1490 _mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A)
1492 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
1493 (__v16qi)
1494 _mm_setzero_si128 (),
1495 __M);
1498 extern __inline __m128i
1499 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1500 _mm256_cvtepi32_epi8 (__m256i __A)
1502 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
1503 (__v16qi)_mm_undefined_si128(),
1504 (__mmask8) -1);
1507 extern __inline __m128i
1508 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1509 _mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1511 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
1512 (__v16qi) __O, __M);
1515 extern __inline void
1516 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1517 _mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1519 __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
1522 extern __inline __m128i
1523 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1524 _mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A)
1526 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
1527 (__v16qi)
1528 _mm_setzero_si128 (),
1529 __M);
1532 extern __inline __m128i
1533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1534 _mm_cvtsepi32_epi8 (__m128i __A)
1536 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
1537 (__v16qi)_mm_undefined_si128(),
1538 (__mmask8) -1);
1541 extern __inline void
1542 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1543 _mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1545 __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
1548 extern __inline __m128i
1549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1550 _mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1552 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
1553 (__v16qi) __O, __M);
1556 extern __inline __m128i
1557 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1558 _mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A)
1560 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
1561 (__v16qi)
1562 _mm_setzero_si128 (),
1563 __M);
1566 extern __inline __m128i
1567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1568 _mm256_cvtsepi32_epi8 (__m256i __A)
1570 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
1571 (__v16qi)_mm_undefined_si128(),
1572 (__mmask8) -1);
1575 extern __inline void
1576 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1577 _mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1579 __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
1582 extern __inline __m128i
1583 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1584 _mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1586 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
1587 (__v16qi) __O, __M);
1590 extern __inline __m128i
1591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1592 _mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A)
1594 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
1595 (__v16qi)
1596 _mm_setzero_si128 (),
1597 __M);
1600 extern __inline __m128i
1601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1602 _mm_cvtusepi32_epi8 (__m128i __A)
1604 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
1605 (__v16qi)_mm_undefined_si128(),
1606 (__mmask8) -1);
1609 extern __inline void
1610 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1611 _mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1613 __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
1616 extern __inline __m128i
1617 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1618 _mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1620 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
1621 (__v16qi) __O,
1622 __M);
1625 extern __inline __m128i
1626 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1627 _mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A)
1629 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
1630 (__v16qi)
1631 _mm_setzero_si128 (),
1632 __M);
1635 extern __inline __m128i
1636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1637 _mm256_cvtusepi32_epi8 (__m256i __A)
1639 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
1640 (__v16qi)_mm_undefined_si128(),
1641 (__mmask8) -1);
1644 extern __inline void
1645 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1646 _mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1648 __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M);
1651 extern __inline __m128i
1652 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1653 _mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1655 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
1656 (__v16qi) __O,
1657 __M);
1660 extern __inline __m128i
1661 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1662 _mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A)
1664 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
1665 (__v16qi)
1666 _mm_setzero_si128 (),
1667 __M);
1670 extern __inline __m128i
1671 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1672 _mm_cvtepi32_epi16 (__m128i __A)
1674 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
1675 (__v8hi) _mm_setzero_si128 (),
1676 (__mmask8) -1);
1679 extern __inline void
1680 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1681 _mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
1683 __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
1686 extern __inline __m128i
1687 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1688 _mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
1690 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
1691 (__v8hi) __O, __M);
1694 extern __inline __m128i
1695 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1696 _mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A)
1698 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
1699 (__v8hi)
1700 _mm_setzero_si128 (),
1701 __M);
1704 extern __inline __m128i
1705 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1706 _mm256_cvtepi32_epi16 (__m256i __A)
1708 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
1709 (__v8hi)_mm_setzero_si128 (),
1710 (__mmask8) -1);
1713 extern __inline
1714 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1715 _mm256_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
1717 __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
1720 extern __inline __m128i
1721 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1722 _mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
1724 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
1725 (__v8hi) __O, __M);
1728 extern __inline __m128i
1729 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1730 _mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A)
1732 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
1733 (__v8hi)
1734 _mm_setzero_si128 (),
1735 __M);
1738 extern __inline __m128i
1739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1740 _mm_cvtsepi32_epi16 (__m128i __A)
1742 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
1743 (__v8hi)_mm_setzero_si128 (),
1744 (__mmask8) -1);
1747 extern __inline void
1748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1749 _mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
1751 __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
1754 extern __inline __m128i
1755 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1756 _mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
1758 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
1759 (__v8hi)__O,
1760 __M);
1763 extern __inline __m128i
1764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1765 _mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A)
1767 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
1768 (__v8hi)
1769 _mm_setzero_si128 (),
1770 __M);
1773 extern __inline __m128i
1774 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1775 _mm256_cvtsepi32_epi16 (__m256i __A)
1777 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
1778 (__v8hi)_mm_undefined_si128(),
1779 (__mmask8) -1);
1782 extern __inline void
1783 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1784 _mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
1786 __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
1789 extern __inline __m128i
1790 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1791 _mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
1793 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
1794 (__v8hi) __O, __M);
1797 extern __inline __m128i
1798 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1799 _mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A)
1801 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
1802 (__v8hi)
1803 _mm_setzero_si128 (),
1804 __M);
1807 extern __inline __m128i
1808 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1809 _mm_cvtusepi32_epi16 (__m128i __A)
1811 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
1812 (__v8hi)_mm_undefined_si128(),
1813 (__mmask8) -1);
1816 extern __inline
1817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1818 _mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
1820 __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
1823 extern __inline __m128i
1824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1825 _mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
1827 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
1828 (__v8hi) __O, __M);
1831 extern __inline __m128i
1832 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1833 _mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A)
1835 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
1836 (__v8hi)
1837 _mm_setzero_si128 (),
1838 __M);
1841 extern __inline __m128i
1842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1843 _mm256_cvtusepi32_epi16 (__m256i __A)
1845 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
1846 (__v8hi)_mm_undefined_si128(),
1847 (__mmask8) -1);
1850 extern __inline void
1851 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1852 _mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
1854 __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
1857 extern __inline __m128i
1858 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1859 _mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
1861 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
1862 (__v8hi) __O, __M);
1865 extern __inline __m128i
1866 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1867 _mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A)
1869 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
1870 (__v8hi)
1871 _mm_setzero_si128 (),
1872 __M);
1875 extern __inline __m128i
1876 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1877 _mm_cvtepi64_epi8 (__m128i __A)
1879 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
1880 (__v16qi)_mm_undefined_si128(),
1881 (__mmask8) -1);
1884 extern __inline void
1885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1886 _mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1888 __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
1891 extern __inline __m128i
1892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1893 _mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1895 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
1896 (__v16qi) __O, __M);
1899 extern __inline __m128i
1900 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1901 _mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A)
1903 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
1904 (__v16qi)
1905 _mm_setzero_si128 (),
1906 __M);
1909 extern __inline __m128i
1910 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1911 _mm256_cvtepi64_epi8 (__m256i __A)
1913 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
1914 (__v16qi)_mm_undefined_si128(),
1915 (__mmask8) -1);
1918 extern __inline void
1919 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1920 _mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1922 __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
1925 extern __inline __m128i
1926 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1927 _mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1929 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
1930 (__v16qi) __O, __M);
1933 extern __inline __m128i
1934 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1935 _mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A)
1937 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
1938 (__v16qi)
1939 _mm_setzero_si128 (),
1940 __M);
1943 extern __inline __m128i
1944 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1945 _mm_cvtsepi64_epi8 (__m128i __A)
1947 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
1948 (__v16qi)_mm_undefined_si128(),
1949 (__mmask8) -1);
1952 extern __inline void
1953 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1954 _mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1956 __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
1959 extern __inline __m128i
1960 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1961 _mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1963 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
1964 (__v16qi) __O, __M);
1967 extern __inline __m128i
1968 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1969 _mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A)
1971 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
1972 (__v16qi)
1973 _mm_setzero_si128 (),
1974 __M);
1977 extern __inline __m128i
1978 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1979 _mm256_cvtsepi64_epi8 (__m256i __A)
1981 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
1982 (__v16qi)_mm_undefined_si128(),
1983 (__mmask8) -1);
1986 extern __inline void
1987 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1988 _mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1990 __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
1993 extern __inline __m128i
1994 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1995 _mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1997 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
1998 (__v16qi) __O, __M);
2001 extern __inline __m128i
2002 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2003 _mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A)
2005 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
2006 (__v16qi)
2007 _mm_setzero_si128 (),
2008 __M);
2011 extern __inline __m128i
2012 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2013 _mm_cvtusepi64_epi8 (__m128i __A)
2015 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
2016 (__v16qi)_mm_undefined_si128(),
2017 (__mmask8) -1);
2020 extern __inline void
2021 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2022 _mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
2024 __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
2027 extern __inline __m128i
2028 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2029 _mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
2031 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
2032 (__v16qi) __O,
2033 __M);
2036 extern __inline __m128i
2037 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2038 _mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A)
2040 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
2041 (__v16qi)
2042 _mm_setzero_si128 (),
2043 __M);
2046 extern __inline __m128i
2047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2048 _mm256_cvtusepi64_epi8 (__m256i __A)
2050 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
2051 (__v16qi)_mm_undefined_si128(),
2052 (__mmask8) -1);
2055 extern __inline void
2056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2057 _mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
2059 __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
2062 extern __inline __m128i
2063 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2064 _mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
2066 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
2067 (__v16qi) __O,
2068 __M);
2071 extern __inline __m128i
2072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2073 _mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A)
2075 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
2076 (__v16qi)
2077 _mm_setzero_si128 (),
2078 __M);
2081 extern __inline __m128i
2082 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2083 _mm_cvtepi64_epi16 (__m128i __A)
2085 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
2086 (__v8hi)_mm_undefined_si128(),
2087 (__mmask8) -1);
2090 extern __inline void
2091 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2092 _mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
2094 __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
2097 extern __inline __m128i
2098 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2099 _mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
2101 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
2102 (__v8hi)__O,
2103 __M);
2106 extern __inline __m128i
2107 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2108 _mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A)
2110 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
2111 (__v8hi)
2112 _mm_setzero_si128 (),
2113 __M);
2116 extern __inline __m128i
2117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2118 _mm256_cvtepi64_epi16 (__m256i __A)
2120 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
2121 (__v8hi)_mm_undefined_si128(),
2122 (__mmask8) -1);
2125 extern __inline void
2126 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2127 _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
2129 __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
2132 extern __inline __m128i
2133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2134 _mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
2136 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
2137 (__v8hi) __O, __M);
2140 extern __inline __m128i
2141 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2142 _mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A)
2144 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
2145 (__v8hi)
2146 _mm_setzero_si128 (),
2147 __M);
2150 extern __inline __m128i
2151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2152 _mm_cvtsepi64_epi16 (__m128i __A)
2154 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
2155 (__v8hi)_mm_undefined_si128(),
2156 (__mmask8) -1);
2159 extern __inline void
2160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2161 _mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
2163 __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
2166 extern __inline __m128i
2167 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2168 _mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
2170 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
2171 (__v8hi) __O, __M);
2174 extern __inline __m128i
2175 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2176 _mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A)
2178 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
2179 (__v8hi)
2180 _mm_setzero_si128 (),
2181 __M);
2184 extern __inline __m128i
2185 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2186 _mm256_cvtsepi64_epi16 (__m256i __A)
2188 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
2189 (__v8hi)_mm_undefined_si128(),
2190 (__mmask8) -1);
2193 extern __inline void
2194 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2195 _mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
2197 __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
2200 extern __inline __m128i
2201 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2202 _mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
2204 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
2205 (__v8hi) __O, __M);
2208 extern __inline __m128i
2209 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2210 _mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A)
2212 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
2213 (__v8hi)
2214 _mm_setzero_si128 (),
2215 __M);
2218 extern __inline __m128i
2219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2220 _mm_cvtusepi64_epi16 (__m128i __A)
2222 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
2223 (__v8hi)_mm_undefined_si128(),
2224 (__mmask8) -1);
2227 extern __inline void
2228 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2229 _mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
2231 __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
2234 extern __inline __m128i
2235 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2236 _mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
2238 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
2239 (__v8hi) __O, __M);
2242 extern __inline __m128i
2243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2244 _mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A)
2246 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
2247 (__v8hi)
2248 _mm_setzero_si128 (),
2249 __M);
2252 extern __inline __m128i
2253 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2254 _mm256_cvtusepi64_epi16 (__m256i __A)
2256 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
2257 (__v8hi)_mm_undefined_si128(),
2258 (__mmask8) -1);
2261 extern __inline void
2262 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2263 _mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
2265 __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
2268 extern __inline __m128i
2269 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2270 _mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
2272 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
2273 (__v8hi) __O, __M);
2276 extern __inline __m128i
2277 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2278 _mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A)
2280 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
2281 (__v8hi)
2282 _mm_setzero_si128 (),
2283 __M);
2286 extern __inline __m128i
2287 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2288 _mm_cvtepi64_epi32 (__m128i __A)
2290 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
2291 (__v4si)_mm_undefined_si128(),
2292 (__mmask8) -1);
2295 extern __inline void
2296 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2297 _mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
2299 __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
2302 extern __inline __m128i
2303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2304 _mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2306 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
2307 (__v4si) __O, __M);
2310 extern __inline __m128i
2311 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2312 _mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A)
2314 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
2315 (__v4si)
2316 _mm_setzero_si128 (),
2317 __M);
2320 extern __inline __m128i
2321 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2322 _mm256_cvtepi64_epi32 (__m256i __A)
2324 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
2325 (__v4si)_mm_undefined_si128(),
2326 (__mmask8) -1);
2329 extern __inline void
2330 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2331 _mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
2333 __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
2336 extern __inline __m128i
2337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2338 _mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
2340 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
2341 (__v4si) __O, __M);
2344 extern __inline __m128i
2345 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2346 _mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A)
2348 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
2349 (__v4si)
2350 _mm_setzero_si128 (),
2351 __M);
2354 extern __inline __m128i
2355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2356 _mm_cvtsepi64_epi32 (__m128i __A)
2358 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
2359 (__v4si)_mm_undefined_si128(),
2360 (__mmask8) -1);
2363 extern __inline
2364 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2365 _mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
2367 __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
2370 extern __inline __m128i
2371 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2372 _mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2374 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
2375 (__v4si) __O, __M);
2378 extern __inline __m128i
2379 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2380 _mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A)
2382 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
2383 (__v4si)
2384 _mm_setzero_si128 (),
2385 __M);
2388 extern __inline __m128i
2389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2390 _mm256_cvtsepi64_epi32 (__m256i __A)
2392 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
2393 (__v4si)_mm_undefined_si128(),
2394 (__mmask8) -1);
2397 extern __inline void
2398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2399 _mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
2401 __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
2404 extern __inline __m128i
2405 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2406 _mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
2408 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
2409 (__v4si)__O,
2410 __M);
2413 extern __inline __m128i
2414 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2415 _mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A)
2417 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
2418 (__v4si)
2419 _mm_setzero_si128 (),
2420 __M);
2423 extern __inline __m128i
2424 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2425 _mm_cvtusepi64_epi32 (__m128i __A)
2427 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
2428 (__v4si)_mm_undefined_si128(),
2429 (__mmask8) -1);
2432 extern __inline void
2433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2434 _mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
2436 __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
2439 extern __inline __m128i
2440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2441 _mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2443 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
2444 (__v4si) __O, __M);
2447 extern __inline __m128i
2448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2449 _mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A)
2451 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
2452 (__v4si)
2453 _mm_setzero_si128 (),
2454 __M);
2457 extern __inline __m128i
2458 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2459 _mm256_cvtusepi64_epi32 (__m256i __A)
2461 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
2462 (__v4si)_mm_undefined_si128(),
2463 (__mmask8) -1);
2466 extern __inline void
2467 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2468 _mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
2470 __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
2473 extern __inline __m128i
2474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2475 _mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
2477 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
2478 (__v4si) __O, __M);
2481 extern __inline __m128i
2482 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2483 _mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A)
2485 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
2486 (__v4si)
2487 _mm_setzero_si128 (),
2488 __M);
2491 extern __inline __m256
2492 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2493 _mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A)
2495 return (__m256) __builtin_ia32_broadcastss256_mask ((__v4sf) __A,
2496 (__v8sf) __O,
2497 __M);
2500 extern __inline __m256
2501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2502 _mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
2504 return (__m256) __builtin_ia32_broadcastss256_mask ((__v4sf) __A,
2505 (__v8sf)
2506 _mm256_setzero_ps (),
2507 __M);
2510 extern __inline __m128
2511 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2512 _mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A)
2514 return (__m128) __builtin_ia32_broadcastss128_mask ((__v4sf) __A,
2515 (__v4sf) __O,
2516 __M);
2519 extern __inline __m128
2520 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2521 _mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
2523 return (__m128) __builtin_ia32_broadcastss128_mask ((__v4sf) __A,
2524 (__v4sf)
2525 _mm_setzero_ps (),
2526 __M);
2529 extern __inline __m256d
2530 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2531 _mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A)
2533 return (__m256d) __builtin_ia32_broadcastsd256_mask ((__v2df) __A,
2534 (__v4df) __O,
2535 __M);
2538 extern __inline __m256d
2539 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2540 _mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
2542 return (__m256d) __builtin_ia32_broadcastsd256_mask ((__v2df) __A,
2543 (__v4df)
2544 _mm256_setzero_pd (),
2545 __M);
2548 extern __inline __m256i
2549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2550 _mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A)
2552 return (__m256i) __builtin_ia32_pbroadcastd256_mask ((__v4si) __A,
2553 (__v8si) __O,
2554 __M);
2557 extern __inline __m256i
2558 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2559 _mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
2561 return (__m256i) __builtin_ia32_pbroadcastd256_mask ((__v4si) __A,
2562 (__v8si)
2563 _mm256_setzero_si256 (),
2564 __M);
2567 extern __inline __m256i
2568 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2569 _mm256_mask_set1_epi32 (__m256i __O, __mmask8 __M, int __A)
2571 return (__m256i) __builtin_ia32_pbroadcastd256_gpr_mask (__A, (__v8si) __O,
2572 __M);
2575 extern __inline __m256i
2576 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2577 _mm256_maskz_set1_epi32 (__mmask8 __M, int __A)
2579 return (__m256i) __builtin_ia32_pbroadcastd256_gpr_mask (__A,
2580 (__v8si)
2581 _mm256_setzero_si256 (),
2582 __M);
2585 extern __inline __m128i
2586 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2587 _mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2589 return (__m128i) __builtin_ia32_pbroadcastd128_mask ((__v4si) __A,
2590 (__v4si) __O,
2591 __M);
2594 extern __inline __m128i
2595 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2596 _mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
2598 return (__m128i) __builtin_ia32_pbroadcastd128_mask ((__v4si) __A,
2599 (__v4si)
2600 _mm_setzero_si128 (),
2601 __M);
2604 extern __inline __m128i
2605 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2606 _mm_mask_set1_epi32 (__m128i __O, __mmask8 __M, int __A)
2608 return (__m128i) __builtin_ia32_pbroadcastd128_gpr_mask (__A, (__v4si) __O,
2609 __M);
2612 extern __inline __m128i
2613 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2614 _mm_maskz_set1_epi32 (__mmask8 __M, int __A)
2616 return (__m128i) __builtin_ia32_pbroadcastd128_gpr_mask (__A,
2617 (__v4si)
2618 _mm_setzero_si128 (),
2619 __M);
2622 extern __inline __m256i
2623 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2624 _mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A)
2626 return (__m256i) __builtin_ia32_pbroadcastq256_mask ((__v2di) __A,
2627 (__v4di) __O,
2628 __M);
2631 extern __inline __m256i
2632 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2633 _mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
2635 return (__m256i) __builtin_ia32_pbroadcastq256_mask ((__v2di) __A,
2636 (__v4di)
2637 _mm256_setzero_si256 (),
2638 __M);
2641 extern __inline __m256i
2642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2643 _mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
2645 #ifdef TARGET_64BIT
2646 return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A, (__v4di) __O,
2647 __M);
2648 #else
2649 return (__m256i) __builtin_ia32_pbroadcastq256_mem_mask (__A, (__v4di) __O,
2650 __M);
2651 #endif
2654 extern __inline __m256i
2655 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2656 _mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
2658 #ifdef TARGET_64BIT
2659 return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A,
2660 (__v4di)
2661 _mm256_setzero_si256 (),
2662 __M);
2663 #else
2664 return (__m256i) __builtin_ia32_pbroadcastq256_mem_mask (__A,
2665 (__v4di)
2666 _mm256_setzero_si256 (),
2667 __M);
2668 #endif
2671 extern __inline __m128i
2672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2673 _mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A)
2675 return (__m128i) __builtin_ia32_pbroadcastq128_mask ((__v2di) __A,
2676 (__v2di) __O,
2677 __M);
2680 extern __inline __m128i
2681 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2682 _mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
2684 return (__m128i) __builtin_ia32_pbroadcastq128_mask ((__v2di) __A,
2685 (__v2di)
2686 _mm_setzero_si128 (),
2687 __M);
2690 extern __inline __m128i
2691 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2692 _mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
2694 #ifdef TARGET_64BIT
2695 return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A, (__v2di) __O,
2696 __M);
2697 #else
2698 return (__m128i) __builtin_ia32_pbroadcastq128_mem_mask (__A, (__v2di) __O,
2699 __M);
2700 #endif
2703 extern __inline __m128i
2704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2705 _mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
2707 #ifdef TARGET_64BIT
2708 return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A,
2709 (__v2di)
2710 _mm_setzero_si128 (),
2711 __M);
2712 #else
2713 return (__m128i) __builtin_ia32_pbroadcastq128_mem_mask (__A,
2714 (__v2di)
2715 _mm_setzero_si128 (),
2716 __M);
2717 #endif
2720 extern __inline __m256
2721 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2722 _mm256_broadcast_f32x4 (__m128 __A)
2724 return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
2725 (__v8sf)_mm256_undefined_pd (),
2726 (__mmask8) -
2730 extern __inline __m256
2731 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2732 _mm256_mask_broadcast_f32x4 (__m256 __O, __mmask8 __M, __m128 __A)
2734 return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
2735 (__v8sf) __O,
2736 __M);
2739 extern __inline __m256
2740 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2741 _mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A)
2743 return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
2744 (__v8sf)
2745 _mm256_setzero_ps (),
2746 __M);
2749 extern __inline __m256i
2750 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2751 _mm256_broadcast_i32x4 (__m128i __A)
2753 return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
2754 __A,
2755 (__v8si)_mm256_undefined_si256 (),
2756 (__mmask8) -
2760 extern __inline __m256i
2761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2762 _mm256_mask_broadcast_i32x4 (__m256i __O, __mmask8 __M, __m128i __A)
2764 return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
2765 __A,
2766 (__v8si)
2767 __O, __M);
2770 extern __inline __m256i
2771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2772 _mm256_maskz_broadcast_i32x4 (__mmask8 __M, __m128i __A)
2774 return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
2775 __A,
2776 (__v8si)
2777 _mm256_setzero_si256 (),
2778 __M);
2781 extern __inline __m256i
2782 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2783 _mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
2785 return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A,
2786 (__v8si) __W,
2787 (__mmask8) __U);
2790 extern __inline __m256i
2791 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2792 _mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
2794 return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A,
2795 (__v8si)
2796 _mm256_setzero_si256 (),
2797 (__mmask8) __U);
2800 extern __inline __m128i
2801 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2802 _mm_mask_cvtepi8_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
2804 return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A,
2805 (__v4si) __W,
2806 (__mmask8) __U);
2809 extern __inline __m128i
2810 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2811 _mm_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
2813 return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A,
2814 (__v4si)
2815 _mm_setzero_si128 (),
2816 (__mmask8) __U);
2819 extern __inline __m256i
2820 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2821 _mm256_mask_cvtepi8_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
2823 return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A,
2824 (__v4di) __W,
2825 (__mmask8) __U);
2828 extern __inline __m256i
2829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2830 _mm256_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
2832 return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A,
2833 (__v4di)
2834 _mm256_setzero_si256 (),
2835 (__mmask8) __U);
2838 extern __inline __m128i
2839 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2840 _mm_mask_cvtepi8_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
2842 return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A,
2843 (__v2di) __W,
2844 (__mmask8) __U);
2847 extern __inline __m128i
2848 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2849 _mm_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
2851 return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A,
2852 (__v2di)
2853 _mm_setzero_si128 (),
2854 (__mmask8) __U);
2857 extern __inline __m256i
2858 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2859 _mm256_mask_cvtepi16_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
2861 return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A,
2862 (__v8si) __W,
2863 (__mmask8) __U);
2866 extern __inline __m256i
2867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2868 _mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
2870 return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A,
2871 (__v8si)
2872 _mm256_setzero_si256 (),
2873 (__mmask8) __U);
2876 extern __inline __m128i
2877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2878 _mm_mask_cvtepi16_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
2880 return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A,
2881 (__v4si) __W,
2882 (__mmask8) __U);
2885 extern __inline __m128i
2886 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2887 _mm_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
2889 return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A,
2890 (__v4si)
2891 _mm_setzero_si128 (),
2892 (__mmask8) __U);
2895 extern __inline __m256i
2896 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2897 _mm256_mask_cvtepi16_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
2899 return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A,
2900 (__v4di) __W,
2901 (__mmask8) __U);
2904 extern __inline __m256i
2905 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2906 _mm256_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
2908 return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A,
2909 (__v4di)
2910 _mm256_setzero_si256 (),
2911 (__mmask8) __U);
2914 extern __inline __m128i
2915 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2916 _mm_mask_cvtepi16_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
2918 return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A,
2919 (__v2di) __W,
2920 (__mmask8) __U);
2923 extern __inline __m128i
2924 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2925 _mm_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
2927 return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A,
2928 (__v2di)
2929 _mm_setzero_si128 (),
2930 (__mmask8) __U);
2933 extern __inline __m256i
2934 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2935 _mm256_mask_cvtepi32_epi64 (__m256i __W, __mmask8 __U, __m128i __X)
2937 return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X,
2938 (__v4di) __W,
2939 (__mmask8) __U);
2942 extern __inline __m256i
2943 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2944 _mm256_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X)
2946 return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X,
2947 (__v4di)
2948 _mm256_setzero_si256 (),
2949 (__mmask8) __U);
2952 extern __inline __m128i
2953 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2954 _mm_mask_cvtepi32_epi64 (__m128i __W, __mmask8 __U, __m128i __X)
2956 return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X,
2957 (__v2di) __W,
2958 (__mmask8) __U);
2961 extern __inline __m128i
2962 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2963 _mm_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X)
2965 return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X,
2966 (__v2di)
2967 _mm_setzero_si128 (),
2968 (__mmask8) __U);
2971 extern __inline __m256i
2972 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2973 _mm256_mask_cvtepu8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
2975 return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A,
2976 (__v8si) __W,
2977 (__mmask8) __U);
2980 extern __inline __m256i
2981 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2982 _mm256_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A)
2984 return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A,
2985 (__v8si)
2986 _mm256_setzero_si256 (),
2987 (__mmask8) __U);
2990 extern __inline __m128i
2991 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2992 _mm_mask_cvtepu8_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
2994 return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A,
2995 (__v4si) __W,
2996 (__mmask8) __U);
2999 extern __inline __m128i
3000 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3001 _mm_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A)
3003 return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A,
3004 (__v4si)
3005 _mm_setzero_si128 (),
3006 (__mmask8) __U);
3009 extern __inline __m256i
3010 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3011 _mm256_mask_cvtepu8_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
3013 return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A,
3014 (__v4di) __W,
3015 (__mmask8) __U);
3018 extern __inline __m256i
3019 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3020 _mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
3022 return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A,
3023 (__v4di)
3024 _mm256_setzero_si256 (),
3025 (__mmask8) __U);
3028 extern __inline __m128i
3029 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3030 _mm_mask_cvtepu8_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
3032 return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A,
3033 (__v2di) __W,
3034 (__mmask8) __U);
3037 extern __inline __m128i
3038 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3039 _mm_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
3041 return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A,
3042 (__v2di)
3043 _mm_setzero_si128 (),
3044 (__mmask8) __U);
3047 extern __inline __m256i
3048 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3049 _mm256_mask_cvtepu16_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
3051 return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A,
3052 (__v8si) __W,
3053 (__mmask8) __U);
3056 extern __inline __m256i
3057 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3058 _mm256_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A)
3060 return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A,
3061 (__v8si)
3062 _mm256_setzero_si256 (),
3063 (__mmask8) __U);
3066 extern __inline __m128i
3067 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3068 _mm_mask_cvtepu16_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
3070 return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A,
3071 (__v4si) __W,
3072 (__mmask8) __U);
3075 extern __inline __m128i
3076 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3077 _mm_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A)
3079 return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A,
3080 (__v4si)
3081 _mm_setzero_si128 (),
3082 (__mmask8) __U);
3085 extern __inline __m256i
3086 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3087 _mm256_mask_cvtepu16_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
3089 return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A,
3090 (__v4di) __W,
3091 (__mmask8) __U);
3094 extern __inline __m256i
3095 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3096 _mm256_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
3098 return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A,
3099 (__v4di)
3100 _mm256_setzero_si256 (),
3101 (__mmask8) __U);
3104 extern __inline __m128i
3105 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3106 _mm_mask_cvtepu16_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
3108 return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A,
3109 (__v2di) __W,
3110 (__mmask8) __U);
3113 extern __inline __m128i
3114 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3115 _mm_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
3117 return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A,
3118 (__v2di)
3119 _mm_setzero_si128 (),
3120 (__mmask8) __U);
3123 extern __inline __m256i
3124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3125 _mm256_mask_cvtepu32_epi64 (__m256i __W, __mmask8 __U, __m128i __X)
3127 return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X,
3128 (__v4di) __W,
3129 (__mmask8) __U);
3132 extern __inline __m256i
3133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3134 _mm256_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X)
3136 return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X,
3137 (__v4di)
3138 _mm256_setzero_si256 (),
3139 (__mmask8) __U);
3142 extern __inline __m128i
3143 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3144 _mm_mask_cvtepu32_epi64 (__m128i __W, __mmask8 __U, __m128i __X)
3146 return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X,
3147 (__v2di) __W,
3148 (__mmask8) __U);
3151 extern __inline __m128i
3152 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3153 _mm_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X)
3155 return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X,
3156 (__v2di)
3157 _mm_setzero_si128 (),
3158 (__mmask8) __U);
3161 extern __inline __m256d
3162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3163 _mm256_rcp14_pd (__m256d __A)
3165 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
3166 (__v4df)
3167 _mm256_setzero_pd (),
3168 (__mmask8) -1);
3171 extern __inline __m256d
3172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3173 _mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A)
3175 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
3176 (__v4df) __W,
3177 (__mmask8) __U);
3180 extern __inline __m256d
3181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3182 _mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A)
3184 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
3185 (__v4df)
3186 _mm256_setzero_pd (),
3187 (__mmask8) __U);
3190 extern __inline __m128d
3191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3192 _mm_rcp14_pd (__m128d __A)
3194 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
3195 (__v2df)
3196 _mm_setzero_pd (),
3197 (__mmask8) -1);
3200 extern __inline __m128d
3201 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3202 _mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A)
3204 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
3205 (__v2df) __W,
3206 (__mmask8) __U);
3209 extern __inline __m128d
3210 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3211 _mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A)
3213 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
3214 (__v2df)
3215 _mm_setzero_pd (),
3216 (__mmask8) __U);
3219 extern __inline __m256
3220 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3221 _mm256_rcp14_ps (__m256 __A)
3223 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
3224 (__v8sf)
3225 _mm256_setzero_ps (),
3226 (__mmask8) -1);
3229 extern __inline __m256
3230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3231 _mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A)
3233 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
3234 (__v8sf) __W,
3235 (__mmask8) __U);
3238 extern __inline __m256
3239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3240 _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A)
3242 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
3243 (__v8sf)
3244 _mm256_setzero_ps (),
3245 (__mmask8) __U);
3248 extern __inline __m128
3249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3250 _mm_rcp14_ps (__m128 __A)
3252 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
3253 (__v4sf)
3254 _mm_setzero_ps (),
3255 (__mmask8) -1);
3258 extern __inline __m128
3259 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3260 _mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A)
3262 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
3263 (__v4sf) __W,
3264 (__mmask8) __U);
3267 extern __inline __m128
3268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3269 _mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A)
3271 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
3272 (__v4sf)
3273 _mm_setzero_ps (),
3274 (__mmask8) __U);
3277 extern __inline __m256d
3278 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3279 _mm256_rsqrt14_pd (__m256d __A)
3281 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
3282 (__v4df)
3283 _mm256_setzero_pd (),
3284 (__mmask8) -1);
3287 extern __inline __m256d
3288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3289 _mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A)
3291 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
3292 (__v4df) __W,
3293 (__mmask8) __U);
3296 extern __inline __m256d
3297 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3298 _mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A)
3300 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
3301 (__v4df)
3302 _mm256_setzero_pd (),
3303 (__mmask8) __U);
3306 extern __inline __m128d
3307 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3308 _mm_rsqrt14_pd (__m128d __A)
3310 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
3311 (__v2df)
3312 _mm_setzero_pd (),
3313 (__mmask8) -1);
3316 extern __inline __m128d
3317 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3318 _mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A)
3320 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
3321 (__v2df) __W,
3322 (__mmask8) __U);
3325 extern __inline __m128d
3326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3327 _mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A)
3329 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
3330 (__v2df)
3331 _mm_setzero_pd (),
3332 (__mmask8) __U);
3335 extern __inline __m256
3336 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3337 _mm256_rsqrt14_ps (__m256 __A)
3339 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
3340 (__v8sf)
3341 _mm256_setzero_ps (),
3342 (__mmask8) -1);
3345 extern __inline __m256
3346 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3347 _mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A)
3349 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
3350 (__v8sf) __W,
3351 (__mmask8) __U);
3354 extern __inline __m256
3355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3356 _mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A)
3358 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
3359 (__v8sf)
3360 _mm256_setzero_ps (),
3361 (__mmask8) __U);
3364 extern __inline __m128
3365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3366 _mm_rsqrt14_ps (__m128 __A)
3368 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
3369 (__v4sf)
3370 _mm_setzero_ps (),
3371 (__mmask8) -1);
3374 extern __inline __m128
3375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3376 _mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A)
3378 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
3379 (__v4sf) __W,
3380 (__mmask8) __U);
3383 extern __inline __m128
3384 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3385 _mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A)
3387 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
3388 (__v4sf)
3389 _mm_setzero_ps (),
3390 (__mmask8) __U);
3393 extern __inline __m256d
3394 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3395 _mm256_mask_sqrt_pd (__m256d __W, __mmask8 __U, __m256d __A)
3397 return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
3398 (__v4df) __W,
3399 (__mmask8) __U);
3402 extern __inline __m256d
3403 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3404 _mm256_maskz_sqrt_pd (__mmask8 __U, __m256d __A)
3406 return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
3407 (__v4df)
3408 _mm256_setzero_pd (),
3409 (__mmask8) __U);
3412 extern __inline __m128d
3413 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3414 _mm_mask_sqrt_pd (__m128d __W, __mmask8 __U, __m128d __A)
3416 return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
3417 (__v2df) __W,
3418 (__mmask8) __U);
3421 extern __inline __m128d
3422 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3423 _mm_maskz_sqrt_pd (__mmask8 __U, __m128d __A)
3425 return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
3426 (__v2df)
3427 _mm_setzero_pd (),
3428 (__mmask8) __U);
3431 extern __inline __m256
3432 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3433 _mm256_mask_sqrt_ps (__m256 __W, __mmask8 __U, __m256 __A)
3435 return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
3436 (__v8sf) __W,
3437 (__mmask8) __U);
3440 extern __inline __m256
3441 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3442 _mm256_maskz_sqrt_ps (__mmask8 __U, __m256 __A)
3444 return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
3445 (__v8sf)
3446 _mm256_setzero_ps (),
3447 (__mmask8) __U);
3450 extern __inline __m128
3451 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3452 _mm_mask_sqrt_ps (__m128 __W, __mmask8 __U, __m128 __A)
3454 return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
3455 (__v4sf) __W,
3456 (__mmask8) __U);
3459 extern __inline __m128
3460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3461 _mm_maskz_sqrt_ps (__mmask8 __U, __m128 __A)
3463 return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
3464 (__v4sf)
3465 _mm_setzero_ps (),
3466 (__mmask8) __U);
3469 extern __inline __m256i
3470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3471 _mm256_mask_add_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3472 __m256i __B)
3474 return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
3475 (__v8si) __B,
3476 (__v8si) __W,
3477 (__mmask8) __U);
3480 extern __inline __m256i
3481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3482 _mm256_maskz_add_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
3484 return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
3485 (__v8si) __B,
3486 (__v8si)
3487 _mm256_setzero_si256 (),
3488 (__mmask8) __U);
3491 extern __inline __m256i
3492 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3493 _mm256_mask_add_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
3494 __m256i __B)
3496 return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
3497 (__v4di) __B,
3498 (__v4di) __W,
3499 (__mmask8) __U);
3502 extern __inline __m256i
3503 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3504 _mm256_maskz_add_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
3506 return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
3507 (__v4di) __B,
3508 (__v4di)
3509 _mm256_setzero_si256 (),
3510 (__mmask8) __U);
3513 extern __inline __m256i
3514 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3515 _mm256_mask_sub_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3516 __m256i __B)
3518 return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
3519 (__v8si) __B,
3520 (__v8si) __W,
3521 (__mmask8) __U);
3524 extern __inline __m256i
3525 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3526 _mm256_maskz_sub_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
3528 return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
3529 (__v8si) __B,
3530 (__v8si)
3531 _mm256_setzero_si256 (),
3532 (__mmask8) __U);
3535 extern __inline __m256i
3536 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3537 _mm256_mask_sub_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
3538 __m256i __B)
3540 return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
3541 (__v4di) __B,
3542 (__v4di) __W,
3543 (__mmask8) __U);
3546 extern __inline __m256i
3547 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3548 _mm256_maskz_sub_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
3550 return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
3551 (__v4di) __B,
3552 (__v4di)
3553 _mm256_setzero_si256 (),
3554 (__mmask8) __U);
3557 extern __inline __m128i
3558 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3559 _mm_mask_add_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
3560 __m128i __B)
3562 return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
3563 (__v4si) __B,
3564 (__v4si) __W,
3565 (__mmask8) __U);
3568 extern __inline __m128i
3569 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3570 _mm_maskz_add_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
3572 return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
3573 (__v4si) __B,
3574 (__v4si)
3575 _mm_setzero_si128 (),
3576 (__mmask8) __U);
3579 extern __inline __m128i
3580 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3581 _mm_mask_add_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
3582 __m128i __B)
3584 return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
3585 (__v2di) __B,
3586 (__v2di) __W,
3587 (__mmask8) __U);
3590 extern __inline __m128i
3591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3592 _mm_maskz_add_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
3594 return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
3595 (__v2di) __B,
3596 (__v2di)
3597 _mm_setzero_si128 (),
3598 (__mmask8) __U);
3601 extern __inline __m128i
3602 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3603 _mm_mask_sub_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
3604 __m128i __B)
3606 return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
3607 (__v4si) __B,
3608 (__v4si) __W,
3609 (__mmask8) __U);
3612 extern __inline __m128i
3613 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3614 _mm_maskz_sub_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
3616 return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
3617 (__v4si) __B,
3618 (__v4si)
3619 _mm_setzero_si128 (),
3620 (__mmask8) __U);
3623 extern __inline __m128i
3624 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3625 _mm_mask_sub_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
3626 __m128i __B)
3628 return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
3629 (__v2di) __B,
3630 (__v2di) __W,
3631 (__mmask8) __U);
3634 extern __inline __m128i
3635 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3636 _mm_maskz_sub_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
3638 return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
3639 (__v2di) __B,
3640 (__v2di)
3641 _mm_setzero_si128 (),
3642 (__mmask8) __U);
3645 extern __inline __m256
3646 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3647 _mm256_getexp_ps (__m256 __A)
3649 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3650 (__v8sf)
3651 _mm256_setzero_ps (),
3652 (__mmask8) -1);
3655 extern __inline __m256
3656 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3657 _mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A)
3659 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3660 (__v8sf) __W,
3661 (__mmask8) __U);
3664 extern __inline __m256
3665 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3666 _mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A)
3668 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3669 (__v8sf)
3670 _mm256_setzero_ps (),
3671 (__mmask8) __U);
3674 extern __inline __m256d
3675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3676 _mm256_getexp_pd (__m256d __A)
3678 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3679 (__v4df)
3680 _mm256_setzero_pd (),
3681 (__mmask8) -1);
3684 extern __inline __m256d
3685 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3686 _mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A)
3688 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3689 (__v4df) __W,
3690 (__mmask8) __U);
3693 extern __inline __m256d
3694 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3695 _mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A)
3697 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3698 (__v4df)
3699 _mm256_setzero_pd (),
3700 (__mmask8) __U);
3703 extern __inline __m128
3704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3705 _mm_getexp_ps (__m128 __A)
3707 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3708 (__v4sf)
3709 _mm_setzero_ps (),
3710 (__mmask8) -1);
3713 extern __inline __m128
3714 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3715 _mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A)
3717 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3718 (__v4sf) __W,
3719 (__mmask8) __U);
3722 extern __inline __m128
3723 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3724 _mm_maskz_getexp_ps (__mmask8 __U, __m128 __A)
3726 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3727 (__v4sf)
3728 _mm_setzero_ps (),
3729 (__mmask8) __U);
3732 extern __inline __m128d
3733 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3734 _mm_getexp_pd (__m128d __A)
3736 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3737 (__v2df)
3738 _mm_setzero_pd (),
3739 (__mmask8) -1);
3742 extern __inline __m128d
3743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3744 _mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A)
3746 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3747 (__v2df) __W,
3748 (__mmask8) __U);
3751 extern __inline __m128d
3752 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3753 _mm_maskz_getexp_pd (__mmask8 __U, __m128d __A)
3755 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3756 (__v2df)
3757 _mm_setzero_pd (),
3758 (__mmask8) __U);
3761 extern __inline __m256i
3762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3763 _mm256_mask_srl_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3764 __m128i __B)
3766 return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A,
3767 (__v4si) __B,
3768 (__v8si) __W,
3769 (__mmask8) __U);
3772 extern __inline __m256i
3773 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3774 _mm256_maskz_srl_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
3776 return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A,
3777 (__v4si) __B,
3778 (__v8si)
3779 _mm256_setzero_si256 (),
3780 (__mmask8) __U);
3783 extern __inline __m128i
3784 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3785 _mm_mask_srl_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
3786 __m128i __B)
3788 return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A,
3789 (__v4si) __B,
3790 (__v4si) __W,
3791 (__mmask8) __U);
3794 extern __inline __m128i
3795 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3796 _mm_maskz_srl_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
3798 return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A,
3799 (__v4si) __B,
3800 (__v4si)
3801 _mm_setzero_si128 (),
3802 (__mmask8) __U);
3805 extern __inline __m256i
3806 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3807 _mm256_mask_srl_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
3808 __m128i __B)
3810 return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A,
3811 (__v2di) __B,
3812 (__v4di) __W,
3813 (__mmask8) __U);
3816 extern __inline __m256i
3817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3818 _mm256_maskz_srl_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
3820 return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A,
3821 (__v2di) __B,
3822 (__v4di)
3823 _mm256_setzero_si256 (),
3824 (__mmask8) __U);
3827 extern __inline __m128i
3828 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3829 _mm_mask_srl_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
3830 __m128i __B)
3832 return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
3833 (__v2di) __B,
3834 (__v2di) __W,
3835 (__mmask8) __U);
3838 extern __inline __m128i
3839 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3840 _mm_maskz_srl_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
3842 return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
3843 (__v2di) __B,
3844 (__v2di)
3845 _mm_setzero_di (),
3846 (__mmask8) __U);
3849 extern __inline __m256i
3850 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3851 _mm256_mask_and_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3852 __m256i __B)
3854 return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A,
3855 (__v8si) __B,
3856 (__v8si) __W,
3857 (__mmask8) __U);
3860 extern __inline __m256i
3861 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3862 _mm256_maskz_and_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
3864 return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A,
3865 (__v8si) __B,
3866 (__v8si)
3867 _mm256_setzero_si256 (),
3868 (__mmask8) __U);
3871 extern __inline __m256d
3872 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3873 _mm256_scalef_pd (__m256d __A, __m256d __B)
3875 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3876 (__v4df) __B,
3877 (__v4df)
3878 _mm256_setzero_pd (),
3879 (__mmask8) -1);
3882 extern __inline __m256d
3883 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3884 _mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A,
3885 __m256d __B)
3887 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3888 (__v4df) __B,
3889 (__v4df) __W,
3890 (__mmask8) __U);
3893 extern __inline __m256d
3894 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3895 _mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B)
3897 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3898 (__v4df) __B,
3899 (__v4df)
3900 _mm256_setzero_pd (),
3901 (__mmask8) __U);
3904 extern __inline __m256
3905 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3906 _mm256_scalef_ps (__m256 __A, __m256 __B)
3908 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3909 (__v8sf) __B,
3910 (__v8sf)
3911 _mm256_setzero_ps (),
3912 (__mmask8) -1);
3915 extern __inline __m256
3916 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3917 _mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A,
3918 __m256 __B)
3920 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3921 (__v8sf) __B,
3922 (__v8sf) __W,
3923 (__mmask8) __U);
3926 extern __inline __m256
3927 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3928 _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B)
3930 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3931 (__v8sf) __B,
3932 (__v8sf)
3933 _mm256_setzero_ps (),
3934 (__mmask8) __U);
3937 extern __inline __m128d
3938 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3939 _mm_scalef_pd (__m128d __A, __m128d __B)
3941 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3942 (__v2df) __B,
3943 (__v2df)
3944 _mm_setzero_pd (),
3945 (__mmask8) -1);
3948 extern __inline __m128d
3949 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3950 _mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A,
3951 __m128d __B)
3953 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3954 (__v2df) __B,
3955 (__v2df) __W,
3956 (__mmask8) __U);
3959 extern __inline __m128d
3960 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3961 _mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B)
3963 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3964 (__v2df) __B,
3965 (__v2df)
3966 _mm_setzero_pd (),
3967 (__mmask8) __U);
3970 extern __inline __m128
3971 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3972 _mm_scalef_ps (__m128 __A, __m128 __B)
3974 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3975 (__v4sf) __B,
3976 (__v4sf)
3977 _mm_setzero_ps (),
3978 (__mmask8) -1);
3981 extern __inline __m128
3982 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3983 _mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
3985 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3986 (__v4sf) __B,
3987 (__v4sf) __W,
3988 (__mmask8) __U);
3991 extern __inline __m128
3992 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3993 _mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B)
3995 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3996 (__v4sf) __B,
3997 (__v4sf)
3998 _mm_setzero_ps (),
3999 (__mmask8) __U);
4002 extern __inline __m256d
4003 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4004 _mm256_mask_fmadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
4005 __m256d __C)
4007 return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
4008 (__v4df) __B,
4009 (__v4df) __C,
4010 (__mmask8) __U);
4013 extern __inline __m256d
4014 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4015 _mm256_mask3_fmadd_pd (__m256d __A, __m256d __B, __m256d __C,
4016 __mmask8 __U)
4018 return (__m256d) __builtin_ia32_vfmaddpd256_mask3 ((__v4df) __A,
4019 (__v4df) __B,
4020 (__v4df) __C,
4021 (__mmask8) __U);
4024 extern __inline __m256d
4025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4026 _mm256_maskz_fmadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
4027 __m256d __C)
4029 return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
4030 (__v4df) __B,
4031 (__v4df) __C,
4032 (__mmask8) __U);
4035 extern __inline __m128d
4036 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4037 _mm_mask_fmadd_pd (__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
4039 return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
4040 (__v2df) __B,
4041 (__v2df) __C,
4042 (__mmask8) __U);
4045 extern __inline __m128d
4046 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4047 _mm_mask3_fmadd_pd (__m128d __A, __m128d __B, __m128d __C,
4048 __mmask8 __U)
4050 return (__m128d) __builtin_ia32_vfmaddpd128_mask3 ((__v2df) __A,
4051 (__v2df) __B,
4052 (__v2df) __C,
4053 (__mmask8) __U);
4056 extern __inline __m128d
4057 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4058 _mm_maskz_fmadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
4059 __m128d __C)
4061 return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
4062 (__v2df) __B,
4063 (__v2df) __C,
4064 (__mmask8) __U);
4067 extern __inline __m256
4068 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4069 _mm256_mask_fmadd_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
4071 return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
4072 (__v8sf) __B,
4073 (__v8sf) __C,
4074 (__mmask8) __U);
4077 extern __inline __m256
4078 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4079 _mm256_mask3_fmadd_ps (__m256 __A, __m256 __B, __m256 __C,
4080 __mmask8 __U)
4082 return (__m256) __builtin_ia32_vfmaddps256_mask3 ((__v8sf) __A,
4083 (__v8sf) __B,
4084 (__v8sf) __C,
4085 (__mmask8) __U);
4088 extern __inline __m256
4089 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4090 _mm256_maskz_fmadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
4091 __m256 __C)
4093 return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
4094 (__v8sf) __B,
4095 (__v8sf) __C,
4096 (__mmask8) __U);
4099 extern __inline __m128
4100 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4101 _mm_mask_fmadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4103 return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
4104 (__v4sf) __B,
4105 (__v4sf) __C,
4106 (__mmask8) __U);
4109 extern __inline __m128
4110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4111 _mm_mask3_fmadd_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4113 return (__m128) __builtin_ia32_vfmaddps128_mask3 ((__v4sf) __A,
4114 (__v4sf) __B,
4115 (__v4sf) __C,
4116 (__mmask8) __U);
4119 extern __inline __m128
4120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4121 _mm_maskz_fmadd_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4123 return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
4124 (__v4sf) __B,
4125 (__v4sf) __C,
4126 (__mmask8) __U);
4129 extern __inline __m256d
4130 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4131 _mm256_mask_fmsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
4132 __m256d __C)
4134 return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
4135 (__v4df) __B,
4136 -(__v4df) __C,
4137 (__mmask8) __U);
4140 extern __inline __m256d
4141 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4142 _mm256_mask3_fmsub_pd (__m256d __A, __m256d __B, __m256d __C,
4143 __mmask8 __U)
4145 return (__m256d) __builtin_ia32_vfmsubpd256_mask3 ((__v4df) __A,
4146 (__v4df) __B,
4147 (__v4df) __C,
4148 (__mmask8) __U);
4151 extern __inline __m256d
4152 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4153 _mm256_maskz_fmsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
4154 __m256d __C)
4156 return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
4157 (__v4df) __B,
4158 -(__v4df) __C,
4159 (__mmask8) __U);
4162 extern __inline __m128d
4163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4164 _mm_mask_fmsub_pd (__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
4166 return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
4167 (__v2df) __B,
4168 -(__v2df) __C,
4169 (__mmask8) __U);
4172 extern __inline __m128d
4173 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4174 _mm_mask3_fmsub_pd (__m128d __A, __m128d __B, __m128d __C,
4175 __mmask8 __U)
4177 return (__m128d) __builtin_ia32_vfmsubpd128_mask3 ((__v2df) __A,
4178 (__v2df) __B,
4179 (__v2df) __C,
4180 (__mmask8) __U);
4183 extern __inline __m128d
4184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4185 _mm_maskz_fmsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
4186 __m128d __C)
4188 return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
4189 (__v2df) __B,
4190 -(__v2df) __C,
4191 (__mmask8) __U);
4194 extern __inline __m256
4195 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4196 _mm256_mask_fmsub_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
4198 return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
4199 (__v8sf) __B,
4200 -(__v8sf) __C,
4201 (__mmask8) __U);
4204 extern __inline __m256
4205 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4206 _mm256_mask3_fmsub_ps (__m256 __A, __m256 __B, __m256 __C,
4207 __mmask8 __U)
4209 return (__m256) __builtin_ia32_vfmsubps256_mask3 ((__v8sf) __A,
4210 (__v8sf) __B,
4211 (__v8sf) __C,
4212 (__mmask8) __U);
4215 extern __inline __m256
4216 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4217 _mm256_maskz_fmsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
4218 __m256 __C)
4220 return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
4221 (__v8sf) __B,
4222 -(__v8sf) __C,
4223 (__mmask8) __U);
4226 extern __inline __m128
4227 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4228 _mm_mask_fmsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4230 return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
4231 (__v4sf) __B,
4232 -(__v4sf) __C,
4233 (__mmask8) __U);
4236 extern __inline __m128
4237 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4238 _mm_mask3_fmsub_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4240 return (__m128) __builtin_ia32_vfmsubps128_mask3 ((__v4sf) __A,
4241 (__v4sf) __B,
4242 (__v4sf) __C,
4243 (__mmask8) __U);
4246 extern __inline __m128
4247 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4248 _mm_maskz_fmsub_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4250 return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
4251 (__v4sf) __B,
4252 -(__v4sf) __C,
4253 (__mmask8) __U);
4256 extern __inline __m256d
4257 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4258 _mm256_mask_fmaddsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
4259 __m256d __C)
4261 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
4262 (__v4df) __B,
4263 (__v4df) __C,
4264 (__mmask8) __U);
4267 extern __inline __m256d
4268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4269 _mm256_mask3_fmaddsub_pd (__m256d __A, __m256d __B, __m256d __C,
4270 __mmask8 __U)
4272 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask3 ((__v4df) __A,
4273 (__v4df) __B,
4274 (__v4df) __C,
4275 (__mmask8)
4276 __U);
4279 extern __inline __m256d
4280 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4281 _mm256_maskz_fmaddsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
4282 __m256d __C)
4284 return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
4285 (__v4df) __B,
4286 (__v4df) __C,
4287 (__mmask8)
4288 __U);
4291 extern __inline __m128d
4292 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4293 _mm_mask_fmaddsub_pd (__m128d __A, __mmask8 __U, __m128d __B,
4294 __m128d __C)
4296 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
4297 (__v2df) __B,
4298 (__v2df) __C,
4299 (__mmask8) __U);
4302 extern __inline __m128d
4303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4304 _mm_mask3_fmaddsub_pd (__m128d __A, __m128d __B, __m128d __C,
4305 __mmask8 __U)
4307 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask3 ((__v2df) __A,
4308 (__v2df) __B,
4309 (__v2df) __C,
4310 (__mmask8)
4311 __U);
4314 extern __inline __m128d
4315 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4316 _mm_maskz_fmaddsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
4317 __m128d __C)
4319 return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
4320 (__v2df) __B,
4321 (__v2df) __C,
4322 (__mmask8)
4323 __U);
4326 extern __inline __m256
4327 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4328 _mm256_mask_fmaddsub_ps (__m256 __A, __mmask8 __U, __m256 __B,
4329 __m256 __C)
4331 return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
4332 (__v8sf) __B,
4333 (__v8sf) __C,
4334 (__mmask8) __U);
4337 extern __inline __m256
4338 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4339 _mm256_mask3_fmaddsub_ps (__m256 __A, __m256 __B, __m256 __C,
4340 __mmask8 __U)
4342 return (__m256) __builtin_ia32_vfmaddsubps256_mask3 ((__v8sf) __A,
4343 (__v8sf) __B,
4344 (__v8sf) __C,
4345 (__mmask8) __U);
4348 extern __inline __m256
4349 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4350 _mm256_maskz_fmaddsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
4351 __m256 __C)
4353 return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
4354 (__v8sf) __B,
4355 (__v8sf) __C,
4356 (__mmask8) __U);
4359 extern __inline __m128
4360 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4361 _mm_mask_fmaddsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4363 return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
4364 (__v4sf) __B,
4365 (__v4sf) __C,
4366 (__mmask8) __U);
4369 extern __inline __m128
4370 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4371 _mm_mask3_fmaddsub_ps (__m128 __A, __m128 __B, __m128 __C,
4372 __mmask8 __U)
4374 return (__m128) __builtin_ia32_vfmaddsubps128_mask3 ((__v4sf) __A,
4375 (__v4sf) __B,
4376 (__v4sf) __C,
4377 (__mmask8) __U);
4380 extern __inline __m128
4381 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4382 _mm_maskz_fmaddsub_ps (__mmask8 __U, __m128 __A, __m128 __B,
4383 __m128 __C)
4385 return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
4386 (__v4sf) __B,
4387 (__v4sf) __C,
4388 (__mmask8) __U);
4391 extern __inline __m256d
4392 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4393 _mm256_mask_fmsubadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
4394 __m256d __C)
4396 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
4397 (__v4df) __B,
4398 -(__v4df) __C,
4399 (__mmask8) __U);
4402 extern __inline __m256d
4403 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4404 _mm256_mask3_fmsubadd_pd (__m256d __A, __m256d __B, __m256d __C,
4405 __mmask8 __U)
4407 return (__m256d) __builtin_ia32_vfmsubaddpd256_mask3 ((__v4df) __A,
4408 (__v4df) __B,
4409 (__v4df) __C,
4410 (__mmask8)
4411 __U);
4414 extern __inline __m256d
4415 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4416 _mm256_maskz_fmsubadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
4417 __m256d __C)
4419 return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
4420 (__v4df) __B,
4421 -(__v4df) __C,
4422 (__mmask8)
4423 __U);
4426 extern __inline __m128d
4427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4428 _mm_mask_fmsubadd_pd (__m128d __A, __mmask8 __U, __m128d __B,
4429 __m128d __C)
4431 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
4432 (__v2df) __B,
4433 -(__v2df) __C,
4434 (__mmask8) __U);
4437 extern __inline __m128d
4438 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4439 _mm_mask3_fmsubadd_pd (__m128d __A, __m128d __B, __m128d __C,
4440 __mmask8 __U)
4442 return (__m128d) __builtin_ia32_vfmsubaddpd128_mask3 ((__v2df) __A,
4443 (__v2df) __B,
4444 (__v2df) __C,
4445 (__mmask8)
4446 __U);
4449 extern __inline __m128d
4450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4451 _mm_maskz_fmsubadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
4452 __m128d __C)
4454 return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
4455 (__v2df) __B,
4456 -(__v2df) __C,
4457 (__mmask8)
4458 __U);
4461 extern __inline __m256
4462 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4463 _mm256_mask_fmsubadd_ps (__m256 __A, __mmask8 __U, __m256 __B,
4464 __m256 __C)
4466 return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
4467 (__v8sf) __B,
4468 -(__v8sf) __C,
4469 (__mmask8) __U);
4472 extern __inline __m256
4473 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4474 _mm256_mask3_fmsubadd_ps (__m256 __A, __m256 __B, __m256 __C,
4475 __mmask8 __U)
4477 return (__m256) __builtin_ia32_vfmsubaddps256_mask3 ((__v8sf) __A,
4478 (__v8sf) __B,
4479 (__v8sf) __C,
4480 (__mmask8) __U);
4483 extern __inline __m256
4484 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4485 _mm256_maskz_fmsubadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
4486 __m256 __C)
4488 return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
4489 (__v8sf) __B,
4490 -(__v8sf) __C,
4491 (__mmask8) __U);
4494 extern __inline __m128
4495 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4496 _mm_mask_fmsubadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4498 return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
4499 (__v4sf) __B,
4500 -(__v4sf) __C,
4501 (__mmask8) __U);
4504 extern __inline __m128
4505 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4506 _mm_mask3_fmsubadd_ps (__m128 __A, __m128 __B, __m128 __C,
4507 __mmask8 __U)
4509 return (__m128) __builtin_ia32_vfmsubaddps128_mask3 ((__v4sf) __A,
4510 (__v4sf) __B,
4511 (__v4sf) __C,
4512 (__mmask8) __U);
4515 extern __inline __m128
4516 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4517 _mm_maskz_fmsubadd_ps (__mmask8 __U, __m128 __A, __m128 __B,
4518 __m128 __C)
4520 return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
4521 (__v4sf) __B,
4522 -(__v4sf) __C,
4523 (__mmask8) __U);
4526 extern __inline __m256d
4527 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4528 _mm256_mask_fnmadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
4529 __m256d __C)
4531 return (__m256d) __builtin_ia32_vfnmaddpd256_mask ((__v4df) __A,
4532 (__v4df) __B,
4533 (__v4df) __C,
4534 (__mmask8) __U);
4537 extern __inline __m256d
4538 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4539 _mm256_mask3_fnmadd_pd (__m256d __A, __m256d __B, __m256d __C,
4540 __mmask8 __U)
4542 return (__m256d) __builtin_ia32_vfmaddpd256_mask3 (-(__v4df) __A,
4543 (__v4df) __B,
4544 (__v4df) __C,
4545 (__mmask8) __U);
4548 extern __inline __m256d
4549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4550 _mm256_maskz_fnmadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
4551 __m256d __C)
4553 return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
4554 (__v4df) __B,
4555 (__v4df) __C,
4556 (__mmask8) __U);
4559 extern __inline __m128d
4560 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4561 _mm_mask_fnmadd_pd (__m128d __A, __mmask8 __U, __m128d __B,
4562 __m128d __C)
4564 return (__m128d) __builtin_ia32_vfnmaddpd128_mask ((__v2df) __A,
4565 (__v2df) __B,
4566 (__v2df) __C,
4567 (__mmask8) __U);
4570 extern __inline __m128d
4571 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4572 _mm_mask3_fnmadd_pd (__m128d __A, __m128d __B, __m128d __C,
4573 __mmask8 __U)
4575 return (__m128d) __builtin_ia32_vfmaddpd128_mask3 (-(__v2df) __A,
4576 (__v2df) __B,
4577 (__v2df) __C,
4578 (__mmask8) __U);
4581 extern __inline __m128d
4582 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4583 _mm_maskz_fnmadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
4584 __m128d __C)
4586 return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
4587 (__v2df) __B,
4588 (__v2df) __C,
4589 (__mmask8) __U);
4592 extern __inline __m256
4593 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4594 _mm256_mask_fnmadd_ps (__m256 __A, __mmask8 __U, __m256 __B,
4595 __m256 __C)
4597 return (__m256) __builtin_ia32_vfnmaddps256_mask ((__v8sf) __A,
4598 (__v8sf) __B,
4599 (__v8sf) __C,
4600 (__mmask8) __U);
4603 extern __inline __m256
4604 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4605 _mm256_mask3_fnmadd_ps (__m256 __A, __m256 __B, __m256 __C,
4606 __mmask8 __U)
4608 return (__m256) __builtin_ia32_vfmaddps256_mask3 (-(__v8sf) __A,
4609 (__v8sf) __B,
4610 (__v8sf) __C,
4611 (__mmask8) __U);
4614 extern __inline __m256
4615 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4616 _mm256_maskz_fnmadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
4617 __m256 __C)
4619 return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
4620 (__v8sf) __B,
4621 (__v8sf) __C,
4622 (__mmask8) __U);
4625 extern __inline __m128
4626 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4627 _mm_mask_fnmadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4629 return (__m128) __builtin_ia32_vfnmaddps128_mask ((__v4sf) __A,
4630 (__v4sf) __B,
4631 (__v4sf) __C,
4632 (__mmask8) __U);
4635 extern __inline __m128
4636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4637 _mm_mask3_fnmadd_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4639 return (__m128) __builtin_ia32_vfmaddps128_mask3 (-(__v4sf) __A,
4640 (__v4sf) __B,
4641 (__v4sf) __C,
4642 (__mmask8) __U);
4645 extern __inline __m128
4646 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4647 _mm_maskz_fnmadd_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4649 return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
4650 (__v4sf) __B,
4651 (__v4sf) __C,
4652 (__mmask8) __U);
4655 extern __inline __m256d
4656 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4657 _mm256_mask_fnmsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
4658 __m256d __C)
4660 return (__m256d) __builtin_ia32_vfnmsubpd256_mask ((__v4df) __A,
4661 (__v4df) __B,
4662 (__v4df) __C,
4663 (__mmask8) __U);
4666 extern __inline __m256d
4667 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4668 _mm256_mask3_fnmsub_pd (__m256d __A, __m256d __B, __m256d __C,
4669 __mmask8 __U)
4671 return (__m256d) __builtin_ia32_vfnmsubpd256_mask3 ((__v4df) __A,
4672 (__v4df) __B,
4673 (__v4df) __C,
4674 (__mmask8) __U);
4677 extern __inline __m256d
4678 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4679 _mm256_maskz_fnmsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
4680 __m256d __C)
4682 return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
4683 (__v4df) __B,
4684 -(__v4df) __C,
4685 (__mmask8) __U);
4688 extern __inline __m128d
4689 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4690 _mm_mask_fnmsub_pd (__m128d __A, __mmask8 __U, __m128d __B,
4691 __m128d __C)
4693 return (__m128d) __builtin_ia32_vfnmsubpd128_mask ((__v2df) __A,
4694 (__v2df) __B,
4695 (__v2df) __C,
4696 (__mmask8) __U);
4699 extern __inline __m128d
4700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4701 _mm_mask3_fnmsub_pd (__m128d __A, __m128d __B, __m128d __C,
4702 __mmask8 __U)
4704 return (__m128d) __builtin_ia32_vfnmsubpd128_mask3 ((__v2df) __A,
4705 (__v2df) __B,
4706 (__v2df) __C,
4707 (__mmask8) __U);
4710 extern __inline __m128d
4711 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4712 _mm_maskz_fnmsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
4713 __m128d __C)
4715 return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
4716 (__v2df) __B,
4717 -(__v2df) __C,
4718 (__mmask8) __U);
4721 extern __inline __m256
4722 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4723 _mm256_mask_fnmsub_ps (__m256 __A, __mmask8 __U, __m256 __B,
4724 __m256 __C)
4726 return (__m256) __builtin_ia32_vfnmsubps256_mask ((__v8sf) __A,
4727 (__v8sf) __B,
4728 (__v8sf) __C,
4729 (__mmask8) __U);
4732 extern __inline __m256
4733 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4734 _mm256_mask3_fnmsub_ps (__m256 __A, __m256 __B, __m256 __C,
4735 __mmask8 __U)
4737 return (__m256) __builtin_ia32_vfnmsubps256_mask3 ((__v8sf) __A,
4738 (__v8sf) __B,
4739 (__v8sf) __C,
4740 (__mmask8) __U);
4743 extern __inline __m256
4744 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4745 _mm256_maskz_fnmsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
4746 __m256 __C)
4748 return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
4749 (__v8sf) __B,
4750 -(__v8sf) __C,
4751 (__mmask8) __U);
4754 extern __inline __m128
4755 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4756 _mm_mask_fnmsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4758 return (__m128) __builtin_ia32_vfnmsubps128_mask ((__v4sf) __A,
4759 (__v4sf) __B,
4760 (__v4sf) __C,
4761 (__mmask8) __U);
4764 extern __inline __m128
4765 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4766 _mm_mask3_fnmsub_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4768 return (__m128) __builtin_ia32_vfnmsubps128_mask3 ((__v4sf) __A,
4769 (__v4sf) __B,
4770 (__v4sf) __C,
4771 (__mmask8) __U);
4774 extern __inline __m128
4775 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4776 _mm_maskz_fnmsub_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4778 return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
4779 (__v4sf) __B,
4780 -(__v4sf) __C,
4781 (__mmask8) __U);
4784 extern __inline __m128i
4785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4786 _mm_mask_and_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
4787 __m128i __B)
4789 return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
4790 (__v4si) __B,
4791 (__v4si) __W,
4792 (__mmask8) __U);
4795 extern __inline __m128i
4796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4797 _mm_maskz_and_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4799 return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
4800 (__v4si) __B,
4801 (__v4si)
4802 _mm_setzero_si128 (),
4803 (__mmask8) __U);
4806 extern __inline __m256i
4807 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4808 _mm256_mask_andnot_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
4809 __m256i __B)
4811 return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A,
4812 (__v8si) __B,
4813 (__v8si) __W,
4814 (__mmask8) __U);
4817 extern __inline __m256i
4818 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4819 _mm256_maskz_andnot_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4821 return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A,
4822 (__v8si) __B,
4823 (__v8si)
4824 _mm256_setzero_si256 (),
4825 (__mmask8) __U);
4828 extern __inline __m128i
4829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4830 _mm_mask_andnot_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
4831 __m128i __B)
4833 return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A,
4834 (__v4si) __B,
4835 (__v4si) __W,
4836 (__mmask8) __U);
4839 extern __inline __m128i
4840 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4841 _mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4843 return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A,
4844 (__v4si) __B,
4845 (__v4si)
4846 _mm_setzero_si128 (),
4847 (__mmask8) __U);
4850 extern __inline __m256i
4851 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4852 _mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
4853 __m256i __B)
4855 return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A,
4856 (__v8si) __B,
4857 (__v8si) __W,
4858 (__mmask8) __U);
4861 extern __inline __m256i
4862 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4863 _mm256_maskz_or_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4865 return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A,
4866 (__v8si) __B,
4867 (__v8si)
4868 _mm256_setzero_si256 (),
4869 (__mmask8) __U);
4872 extern __inline __m128i
4873 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4874 _mm_mask_or_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4876 return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
4877 (__v4si) __B,
4878 (__v4si) __W,
4879 (__mmask8) __U);
4882 extern __inline __m128i
4883 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4884 _mm_maskz_or_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4886 return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
4887 (__v4si) __B,
4888 (__v4si)
4889 _mm_setzero_si128 (),
4890 (__mmask8) __U);
4893 extern __inline __m256i
4894 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4895 _mm256_mask_xor_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
4896 __m256i __B)
4898 return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A,
4899 (__v8si) __B,
4900 (__v8si) __W,
4901 (__mmask8) __U);
4904 extern __inline __m256i
4905 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4906 _mm256_maskz_xor_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4908 return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A,
4909 (__v8si) __B,
4910 (__v8si)
4911 _mm256_setzero_si256 (),
4912 (__mmask8) __U);
4915 extern __inline __m128i
4916 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4917 _mm_mask_xor_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
4918 __m128i __B)
4920 return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A,
4921 (__v4si) __B,
4922 (__v4si) __W,
4923 (__mmask8) __U);
4926 extern __inline __m128i
4927 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4928 _mm_maskz_xor_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4930 return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A,
4931 (__v4si) __B,
4932 (__v4si)
4933 _mm_setzero_si128 (),
4934 (__mmask8) __U);
4937 extern __inline __m128
4938 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4939 _mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A)
4941 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
4942 (__v4sf) __W,
4943 (__mmask8) __U);
4946 extern __inline __m128
4947 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4948 _mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A)
4950 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
4951 (__v4sf)
4952 _mm_setzero_ps (),
4953 (__mmask8) __U);
4956 extern __inline __m128
4957 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4958 _mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A)
4960 return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
4961 (__v4sf) __W,
4962 (__mmask8) __U);
4965 extern __inline __m128
4966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4967 _mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A)
4969 return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
4970 (__v4sf)
4971 _mm_setzero_ps (),
4972 (__mmask8) __U);
4975 extern __inline __m256i
4976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4977 _mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A)
4979 return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
4980 (__v8si) __W,
4981 (__mmask8) __U);
4984 extern __inline __m256i
4985 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4986 _mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A)
4988 return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
4989 (__v8si)
4990 _mm256_setzero_si256 (),
4991 (__mmask8) __U);
4994 extern __inline __m128i
4995 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4996 _mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A)
4998 return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
4999 (__v4si) __W,
5000 (__mmask8) __U);
5003 extern __inline __m128i
5004 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5005 _mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A)
5007 return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
5008 (__v4si)
5009 _mm_setzero_si128 (),
5010 (__mmask8) __U);
5013 extern __inline __m256i
5014 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5015 _mm256_cvtps_epu32 (__m256 __A)
5017 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
5018 (__v8si)
5019 _mm256_setzero_si256 (),
5020 (__mmask8) -1);
5023 extern __inline __m256i
5024 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5025 _mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A)
5027 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
5028 (__v8si) __W,
5029 (__mmask8) __U);
5032 extern __inline __m256i
5033 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5034 _mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A)
5036 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
5037 (__v8si)
5038 _mm256_setzero_si256 (),
5039 (__mmask8) __U);
5042 extern __inline __m128i
5043 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5044 _mm_cvtps_epu32 (__m128 __A)
5046 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
5047 (__v4si)
5048 _mm_setzero_si128 (),
5049 (__mmask8) -1);
5052 extern __inline __m128i
5053 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5054 _mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A)
5056 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
5057 (__v4si) __W,
5058 (__mmask8) __U);
5061 extern __inline __m128i
5062 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5063 _mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A)
5065 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
5066 (__v4si)
5067 _mm_setzero_si128 (),
5068 (__mmask8) __U);
5071 extern __inline __m256d
5072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5073 _mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A)
5075 return (__m256d) __builtin_ia32_movddup256_mask ((__v4df) __A,
5076 (__v4df) __W,
5077 (__mmask8) __U);
5080 extern __inline __m256d
5081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5082 _mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A)
5084 return (__m256d) __builtin_ia32_movddup256_mask ((__v4df) __A,
5085 (__v4df)
5086 _mm256_setzero_pd (),
5087 (__mmask8) __U);
5090 extern __inline __m128d
5091 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5092 _mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A)
5094 return (__m128d) __builtin_ia32_movddup128_mask ((__v2df) __A,
5095 (__v2df) __W,
5096 (__mmask8) __U);
5099 extern __inline __m128d
5100 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5101 _mm_maskz_movedup_pd (__mmask8 __U, __m128d __A)
5103 return (__m128d) __builtin_ia32_movddup128_mask ((__v2df) __A,
5104 (__v2df)
5105 _mm_setzero_pd (),
5106 (__mmask8) __U);
5109 extern __inline __m256
5110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5111 _mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A)
5113 return (__m256) __builtin_ia32_movshdup256_mask ((__v8sf) __A,
5114 (__v8sf) __W,
5115 (__mmask8) __U);
5118 extern __inline __m256
5119 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5120 _mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A)
5122 return (__m256) __builtin_ia32_movshdup256_mask ((__v8sf) __A,
5123 (__v8sf)
5124 _mm256_setzero_ps (),
5125 (__mmask8) __U);
5128 extern __inline __m128
5129 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5130 _mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A)
5132 return (__m128) __builtin_ia32_movshdup128_mask ((__v4sf) __A,
5133 (__v4sf) __W,
5134 (__mmask8) __U);
5137 extern __inline __m128
5138 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5139 _mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A)
5141 return (__m128) __builtin_ia32_movshdup128_mask ((__v4sf) __A,
5142 (__v4sf)
5143 _mm_setzero_ps (),
5144 (__mmask8) __U);
5147 extern __inline __m256
5148 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5149 _mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A)
5151 return (__m256) __builtin_ia32_movsldup256_mask ((__v8sf) __A,
5152 (__v8sf) __W,
5153 (__mmask8) __U);
5156 extern __inline __m256
5157 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5158 _mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A)
5160 return (__m256) __builtin_ia32_movsldup256_mask ((__v8sf) __A,
5161 (__v8sf)
5162 _mm256_setzero_ps (),
5163 (__mmask8) __U);
5166 extern __inline __m128
5167 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5168 _mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A)
5170 return (__m128) __builtin_ia32_movsldup128_mask ((__v4sf) __A,
5171 (__v4sf) __W,
5172 (__mmask8) __U);
5175 extern __inline __m128
5176 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5177 _mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A)
5179 return (__m128) __builtin_ia32_movsldup128_mask ((__v4sf) __A,
5180 (__v4sf)
5181 _mm_setzero_ps (),
5182 (__mmask8) __U);
5185 extern __inline __m128i
5186 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5187 _mm_mask_unpackhi_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
5188 __m128i __B)
5190 return (__m128i) __builtin_ia32_punpckhdq128_mask ((__v4si) __A,
5191 (__v4si) __B,
5192 (__v4si) __W,
5193 (__mmask8) __U);
5196 extern __inline __m128i
5197 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5198 _mm_maskz_unpackhi_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
5200 return (__m128i) __builtin_ia32_punpckhdq128_mask ((__v4si) __A,
5201 (__v4si) __B,
5202 (__v4si)
5203 _mm_setzero_si128 (),
5204 (__mmask8) __U);
5207 extern __inline __m256i
5208 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5209 _mm256_mask_unpackhi_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
5210 __m256i __B)
5212 return (__m256i) __builtin_ia32_punpckhdq256_mask ((__v8si) __A,
5213 (__v8si) __B,
5214 (__v8si) __W,
5215 (__mmask8) __U);
5218 extern __inline __m256i
5219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5220 _mm256_maskz_unpackhi_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
5222 return (__m256i) __builtin_ia32_punpckhdq256_mask ((__v8si) __A,
5223 (__v8si) __B,
5224 (__v8si)
5225 _mm256_setzero_si256 (),
5226 (__mmask8) __U);
5229 extern __inline __m128i
5230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5231 _mm_mask_unpackhi_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
5232 __m128i __B)
5234 return (__m128i) __builtin_ia32_punpckhqdq128_mask ((__v2di) __A,
5235 (__v2di) __B,
5236 (__v2di) __W,
5237 (__mmask8) __U);
5240 extern __inline __m128i
5241 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5242 _mm_maskz_unpackhi_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
5244 return (__m128i) __builtin_ia32_punpckhqdq128_mask ((__v2di) __A,
5245 (__v2di) __B,
5246 (__v2di)
5247 _mm_setzero_di (),
5248 (__mmask8) __U);
5251 extern __inline __m256i
5252 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5253 _mm256_mask_unpackhi_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
5254 __m256i __B)
5256 return (__m256i) __builtin_ia32_punpckhqdq256_mask ((__v4di) __A,
5257 (__v4di) __B,
5258 (__v4di) __W,
5259 (__mmask8) __U);
5262 extern __inline __m256i
5263 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5264 _mm256_maskz_unpackhi_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
5266 return (__m256i) __builtin_ia32_punpckhqdq256_mask ((__v4di) __A,
5267 (__v4di) __B,
5268 (__v4di)
5269 _mm256_setzero_si256 (),
5270 (__mmask8) __U);
5273 extern __inline __m128i
5274 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5275 _mm_mask_unpacklo_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
5276 __m128i __B)
5278 return (__m128i) __builtin_ia32_punpckldq128_mask ((__v4si) __A,
5279 (__v4si) __B,
5280 (__v4si) __W,
5281 (__mmask8) __U);
5284 extern __inline __m128i
5285 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5286 _mm_maskz_unpacklo_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
5288 return (__m128i) __builtin_ia32_punpckldq128_mask ((__v4si) __A,
5289 (__v4si) __B,
5290 (__v4si)
5291 _mm_setzero_si128 (),
5292 (__mmask8) __U);
5295 extern __inline __m256i
5296 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5297 _mm256_mask_unpacklo_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
5298 __m256i __B)
5300 return (__m256i) __builtin_ia32_punpckldq256_mask ((__v8si) __A,
5301 (__v8si) __B,
5302 (__v8si) __W,
5303 (__mmask8) __U);
5306 extern __inline __m256i
5307 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5308 _mm256_maskz_unpacklo_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
5310 return (__m256i) __builtin_ia32_punpckldq256_mask ((__v8si) __A,
5311 (__v8si) __B,
5312 (__v8si)
5313 _mm256_setzero_si256 (),
5314 (__mmask8) __U);
5317 extern __inline __m128i
5318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5319 _mm_mask_unpacklo_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
5320 __m128i __B)
5322 return (__m128i) __builtin_ia32_punpcklqdq128_mask ((__v2di) __A,
5323 (__v2di) __B,
5324 (__v2di) __W,
5325 (__mmask8) __U);
5328 extern __inline __m128i
5329 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5330 _mm_maskz_unpacklo_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
5332 return (__m128i) __builtin_ia32_punpcklqdq128_mask ((__v2di) __A,
5333 (__v2di) __B,
5334 (__v2di)
5335 _mm_setzero_di (),
5336 (__mmask8) __U);
5339 extern __inline __m256i
5340 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5341 _mm256_mask_unpacklo_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
5342 __m256i __B)
5344 return (__m256i) __builtin_ia32_punpcklqdq256_mask ((__v4di) __A,
5345 (__v4di) __B,
5346 (__v4di) __W,
5347 (__mmask8) __U);
5350 extern __inline __m256i
5351 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5352 _mm256_maskz_unpacklo_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
5354 return (__m256i) __builtin_ia32_punpcklqdq256_mask ((__v4di) __A,
5355 (__v4di) __B,
5356 (__v4di)
5357 _mm256_setzero_si256 (),
5358 (__mmask8) __U);
5361 extern __inline __mmask8
5362 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5363 _mm_cmpeq_epu32_mask (__m128i __A, __m128i __B)
5365 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5366 (__v4si) __B, 0,
5367 (__mmask8) -1);
5370 extern __inline __mmask8
5371 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5372 _mm_cmpeq_epi32_mask (__m128i __A, __m128i __B)
5374 return (__mmask8) __builtin_ia32_pcmpeqd128_mask ((__v4si) __A,
5375 (__v4si) __B,
5376 (__mmask8) -1);
5379 extern __inline __mmask8
5380 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5381 _mm_mask_cmpeq_epu32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5383 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5384 (__v4si) __B, 0, __U);
5387 extern __inline __mmask8
5388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5389 _mm_mask_cmpeq_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5391 return (__mmask8) __builtin_ia32_pcmpeqd128_mask ((__v4si) __A,
5392 (__v4si) __B, __U);
5395 extern __inline __mmask8
5396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5397 _mm256_cmpeq_epu32_mask (__m256i __A, __m256i __B)
5399 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5400 (__v8si) __B, 0,
5401 (__mmask8) -1);
5404 extern __inline __mmask8
5405 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5406 _mm256_cmpeq_epi32_mask (__m256i __A, __m256i __B)
5408 return (__mmask8) __builtin_ia32_pcmpeqd256_mask ((__v8si) __A,
5409 (__v8si) __B,
5410 (__mmask8) -1);
5413 extern __inline __mmask8
5414 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5415 _mm256_mask_cmpeq_epu32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5417 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5418 (__v8si) __B, 0, __U);
5421 extern __inline __mmask8
5422 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5423 _mm256_mask_cmpeq_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5425 return (__mmask8) __builtin_ia32_pcmpeqd256_mask ((__v8si) __A,
5426 (__v8si) __B, __U);
5429 extern __inline __mmask8
5430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5431 _mm_cmpeq_epu64_mask (__m128i __A, __m128i __B)
5433 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5434 (__v2di) __B, 0,
5435 (__mmask8) -1);
5438 extern __inline __mmask8
5439 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5440 _mm_cmpeq_epi64_mask (__m128i __A, __m128i __B)
5442 return (__mmask8) __builtin_ia32_pcmpeqq128_mask ((__v2di) __A,
5443 (__v2di) __B,
5444 (__mmask8) -1);
5447 extern __inline __mmask8
5448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5449 _mm_mask_cmpeq_epu64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5451 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5452 (__v2di) __B, 0, __U);
5455 extern __inline __mmask8
5456 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5457 _mm_mask_cmpeq_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5459 return (__mmask8) __builtin_ia32_pcmpeqq128_mask ((__v2di) __A,
5460 (__v2di) __B, __U);
5463 extern __inline __mmask8
5464 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5465 _mm256_cmpeq_epu64_mask (__m256i __A, __m256i __B)
5467 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5468 (__v4di) __B, 0,
5469 (__mmask8) -1);
5472 extern __inline __mmask8
5473 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5474 _mm256_cmpeq_epi64_mask (__m256i __A, __m256i __B)
5476 return (__mmask8) __builtin_ia32_pcmpeqq256_mask ((__v4di) __A,
5477 (__v4di) __B,
5478 (__mmask8) -1);
5481 extern __inline __mmask8
5482 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5483 _mm256_mask_cmpeq_epu64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5485 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5486 (__v4di) __B, 0, __U);
5489 extern __inline __mmask8
5490 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5491 _mm256_mask_cmpeq_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5493 return (__mmask8) __builtin_ia32_pcmpeqq256_mask ((__v4di) __A,
5494 (__v4di) __B, __U);
5497 extern __inline __mmask8
5498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5499 _mm_cmpgt_epu32_mask (__m128i __A, __m128i __B)
5501 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5502 (__v4si) __B, 6,
5503 (__mmask8) -1);
5506 extern __inline __mmask8
5507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5508 _mm_cmpgt_epi32_mask (__m128i __A, __m128i __B)
5510 return (__mmask8) __builtin_ia32_pcmpgtd128_mask ((__v4si) __A,
5511 (__v4si) __B,
5512 (__mmask8) -1);
5515 extern __inline __mmask8
5516 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5517 _mm_mask_cmpgt_epu32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5519 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5520 (__v4si) __B, 6, __U);
5523 extern __inline __mmask8
5524 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5525 _mm_mask_cmpgt_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5527 return (__mmask8) __builtin_ia32_pcmpgtd128_mask ((__v4si) __A,
5528 (__v4si) __B, __U);
5531 extern __inline __mmask8
5532 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5533 _mm256_cmpgt_epu32_mask (__m256i __A, __m256i __B)
5535 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5536 (__v8si) __B, 6,
5537 (__mmask8) -1);
5540 extern __inline __mmask8
5541 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5542 _mm256_cmpgt_epi32_mask (__m256i __A, __m256i __B)
5544 return (__mmask8) __builtin_ia32_pcmpgtd256_mask ((__v8si) __A,
5545 (__v8si) __B,
5546 (__mmask8) -1);
5549 extern __inline __mmask8
5550 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5551 _mm256_mask_cmpgt_epu32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5553 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5554 (__v8si) __B, 6, __U);
5557 extern __inline __mmask8
5558 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5559 _mm256_mask_cmpgt_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5561 return (__mmask8) __builtin_ia32_pcmpgtd256_mask ((__v8si) __A,
5562 (__v8si) __B, __U);
5565 extern __inline __mmask8
5566 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5567 _mm_cmpgt_epu64_mask (__m128i __A, __m128i __B)
5569 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5570 (__v2di) __B, 6,
5571 (__mmask8) -1);
5574 extern __inline __mmask8
5575 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5576 _mm_cmpgt_epi64_mask (__m128i __A, __m128i __B)
5578 return (__mmask8) __builtin_ia32_pcmpgtq128_mask ((__v2di) __A,
5579 (__v2di) __B,
5580 (__mmask8) -1);
5583 extern __inline __mmask8
5584 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5585 _mm_mask_cmpgt_epu64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5587 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5588 (__v2di) __B, 6, __U);
5591 extern __inline __mmask8
5592 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5593 _mm_mask_cmpgt_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5595 return (__mmask8) __builtin_ia32_pcmpgtq128_mask ((__v2di) __A,
5596 (__v2di) __B, __U);
5599 extern __inline __mmask8
5600 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5601 _mm256_cmpgt_epu64_mask (__m256i __A, __m256i __B)
5603 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5604 (__v4di) __B, 6,
5605 (__mmask8) -1);
5608 extern __inline __mmask8
5609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5610 _mm256_cmpgt_epi64_mask (__m256i __A, __m256i __B)
5612 return (__mmask8) __builtin_ia32_pcmpgtq256_mask ((__v4di) __A,
5613 (__v4di) __B,
5614 (__mmask8) -1);
5617 extern __inline __mmask8
5618 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5619 _mm256_mask_cmpgt_epu64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5621 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5622 (__v4di) __B, 6, __U);
5625 extern __inline __mmask8
5626 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5627 _mm256_mask_cmpgt_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5629 return (__mmask8) __builtin_ia32_pcmpgtq256_mask ((__v4di) __A,
5630 (__v4di) __B, __U);
5633 extern __inline __mmask8
5634 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5635 _mm_test_epi32_mask (__m128i __A, __m128i __B)
5637 return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
5638 (__v4si) __B,
5639 (__mmask8) -1);
5642 extern __inline __mmask8
5643 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5644 _mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5646 return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
5647 (__v4si) __B, __U);
5650 extern __inline __mmask8
5651 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5652 _mm256_test_epi32_mask (__m256i __A, __m256i __B)
5654 return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
5655 (__v8si) __B,
5656 (__mmask8) -1);
5659 extern __inline __mmask8
5660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5661 _mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5663 return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
5664 (__v8si) __B, __U);
5667 extern __inline __mmask8
5668 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5669 _mm_test_epi64_mask (__m128i __A, __m128i __B)
5671 return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
5672 (__v2di) __B,
5673 (__mmask8) -1);
5676 extern __inline __mmask8
5677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5678 _mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5680 return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
5681 (__v2di) __B, __U);
5684 extern __inline __mmask8
5685 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5686 _mm256_test_epi64_mask (__m256i __A, __m256i __B)
5688 return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
5689 (__v4di) __B,
5690 (__mmask8) -1);
5693 extern __inline __mmask8
5694 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5695 _mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5697 return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
5698 (__v4di) __B, __U);
5701 extern __inline __mmask8
5702 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5703 _mm_testn_epi32_mask (__m128i __A, __m128i __B)
5705 return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
5706 (__v4si) __B,
5707 (__mmask8) -1);
5710 extern __inline __mmask8
5711 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5712 _mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5714 return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
5715 (__v4si) __B, __U);
5718 extern __inline __mmask8
5719 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5720 _mm256_testn_epi32_mask (__m256i __A, __m256i __B)
5722 return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
5723 (__v8si) __B,
5724 (__mmask8) -1);
5727 extern __inline __mmask8
5728 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5729 _mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5731 return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
5732 (__v8si) __B, __U);
5735 extern __inline __mmask8
5736 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5737 _mm_testn_epi64_mask (__m128i __A, __m128i __B)
5739 return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
5740 (__v2di) __B,
5741 (__mmask8) -1);
5744 extern __inline __mmask8
5745 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5746 _mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5748 return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
5749 (__v2di) __B, __U);
5752 extern __inline __mmask8
5753 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5754 _mm256_testn_epi64_mask (__m256i __A, __m256i __B)
5756 return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
5757 (__v4di) __B,
5758 (__mmask8) -1);
5761 extern __inline __mmask8
5762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5763 _mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5765 return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
5766 (__v4di) __B, __U);
5769 extern __inline __m256d
5770 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5771 _mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A)
5773 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
5774 (__v4df) __W,
5775 (__mmask8) __U);
5778 extern __inline __m256d
5779 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5780 _mm256_maskz_compress_pd (__mmask8 __U, __m256d __A)
5782 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
5783 (__v4df)
5784 _mm256_setzero_pd (),
5785 (__mmask8) __U);
5788 extern __inline void
5789 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5790 _mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A)
5792 __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
5793 (__v4df) __A,
5794 (__mmask8) __U);
5797 extern __inline __m128d
5798 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5799 _mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A)
5801 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
5802 (__v2df) __W,
5803 (__mmask8) __U);
5806 extern __inline __m128d
5807 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5808 _mm_maskz_compress_pd (__mmask8 __U, __m128d __A)
5810 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
5811 (__v2df)
5812 _mm_setzero_pd (),
5813 (__mmask8) __U);
5816 extern __inline void
5817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5818 _mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A)
5820 __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
5821 (__v2df) __A,
5822 (__mmask8) __U);
5825 extern __inline __m256
5826 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5827 _mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A)
5829 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
5830 (__v8sf) __W,
5831 (__mmask8) __U);
5834 extern __inline __m256
5835 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5836 _mm256_maskz_compress_ps (__mmask8 __U, __m256 __A)
5838 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
5839 (__v8sf)
5840 _mm256_setzero_ps (),
5841 (__mmask8) __U);
5844 extern __inline void
5845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5846 _mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A)
5848 __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
5849 (__v8sf) __A,
5850 (__mmask8) __U);
5853 extern __inline __m128
5854 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5855 _mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A)
5857 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
5858 (__v4sf) __W,
5859 (__mmask8) __U);
5862 extern __inline __m128
5863 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5864 _mm_maskz_compress_ps (__mmask8 __U, __m128 __A)
5866 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
5867 (__v4sf)
5868 _mm_setzero_ps (),
5869 (__mmask8) __U);
5872 extern __inline void
5873 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5874 _mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A)
5876 __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
5877 (__v4sf) __A,
5878 (__mmask8) __U);
5881 extern __inline __m256i
5882 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5883 _mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
5885 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
5886 (__v4di) __W,
5887 (__mmask8) __U);
5890 extern __inline __m256i
5891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5892 _mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A)
5894 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
5895 (__v4di)
5896 _mm256_setzero_si256 (),
5897 (__mmask8) __U);
5900 extern __inline void
5901 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5902 _mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A)
5904 __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
5905 (__v4di) __A,
5906 (__mmask8) __U);
5909 extern __inline __m128i
5910 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5911 _mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
5913 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
5914 (__v2di) __W,
5915 (__mmask8) __U);
5918 extern __inline __m128i
5919 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5920 _mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A)
5922 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
5923 (__v2di)
5924 _mm_setzero_di (),
5925 (__mmask8) __U);
5928 extern __inline void
5929 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5930 _mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A)
5932 __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
5933 (__v2di) __A,
5934 (__mmask8) __U);
5937 extern __inline __m256i
5938 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5939 _mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
5941 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
5942 (__v8si) __W,
5943 (__mmask8) __U);
5946 extern __inline __m256i
5947 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5948 _mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A)
5950 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
5951 (__v8si)
5952 _mm256_setzero_si256 (),
5953 (__mmask8) __U);
5956 extern __inline void
5957 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5958 _mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A)
5960 __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
5961 (__v8si) __A,
5962 (__mmask8) __U);
5965 extern __inline __m128i
5966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5967 _mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
5969 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
5970 (__v4si) __W,
5971 (__mmask8) __U);
5974 extern __inline __m128i
5975 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5976 _mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A)
5978 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
5979 (__v4si)
5980 _mm_setzero_si128 (),
5981 (__mmask8) __U);
5984 extern __inline void
5985 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5986 _mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A)
5988 __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
5989 (__v4si) __A,
5990 (__mmask8) __U);
5993 extern __inline __m256d
5994 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5995 _mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A)
5997 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
5998 (__v4df) __W,
5999 (__mmask8) __U);
6002 extern __inline __m256d
6003 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6004 _mm256_maskz_expand_pd (__mmask8 __U, __m256d __A)
6006 return (__m256d) __builtin_ia32_expanddf256_maskz ((__v4df) __A,
6007 (__v4df)
6008 _mm256_setzero_pd (),
6009 (__mmask8) __U);
6012 extern __inline __m256d
6013 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6014 _mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P)
6016 return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
6017 (__v4df) __W,
6018 (__mmask8)
6019 __U);
6022 extern __inline __m256d
6023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6024 _mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
6026 return (__m256d) __builtin_ia32_expandloaddf256_maskz ((__v4df *) __P,
6027 (__v4df)
6028 _mm256_setzero_pd (),
6029 (__mmask8)
6030 __U);
6033 extern __inline __m128d
6034 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6035 _mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A)
6037 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
6038 (__v2df) __W,
6039 (__mmask8) __U);
6042 extern __inline __m128d
6043 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6044 _mm_maskz_expand_pd (__mmask8 __U, __m128d __A)
6046 return (__m128d) __builtin_ia32_expanddf128_maskz ((__v2df) __A,
6047 (__v2df)
6048 _mm_setzero_pd (),
6049 (__mmask8) __U);
6052 extern __inline __m128d
6053 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6054 _mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P)
6056 return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
6057 (__v2df) __W,
6058 (__mmask8)
6059 __U);
6062 extern __inline __m128d
6063 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6064 _mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
6066 return (__m128d) __builtin_ia32_expandloaddf128_maskz ((__v2df *) __P,
6067 (__v2df)
6068 _mm_setzero_pd (),
6069 (__mmask8)
6070 __U);
6073 extern __inline __m256
6074 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6075 _mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A)
6077 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
6078 (__v8sf) __W,
6079 (__mmask8) __U);
6082 extern __inline __m256
6083 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6084 _mm256_maskz_expand_ps (__mmask8 __U, __m256 __A)
6086 return (__m256) __builtin_ia32_expandsf256_maskz ((__v8sf) __A,
6087 (__v8sf)
6088 _mm256_setzero_ps (),
6089 (__mmask8) __U);
6092 extern __inline __m256
6093 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6094 _mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P)
6096 return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
6097 (__v8sf) __W,
6098 (__mmask8) __U);
6101 extern __inline __m256
6102 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6103 _mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P)
6105 return (__m256) __builtin_ia32_expandloadsf256_maskz ((__v8sf *) __P,
6106 (__v8sf)
6107 _mm256_setzero_ps (),
6108 (__mmask8)
6109 __U);
6112 extern __inline __m128
6113 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6114 _mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A)
6116 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
6117 (__v4sf) __W,
6118 (__mmask8) __U);
6121 extern __inline __m128
6122 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6123 _mm_maskz_expand_ps (__mmask8 __U, __m128 __A)
6125 return (__m128) __builtin_ia32_expandsf128_maskz ((__v4sf) __A,
6126 (__v4sf)
6127 _mm_setzero_ps (),
6128 (__mmask8) __U);
6131 extern __inline __m128
6132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6133 _mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P)
6135 return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
6136 (__v4sf) __W,
6137 (__mmask8) __U);
6140 extern __inline __m128
6141 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6142 _mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P)
6144 return (__m128) __builtin_ia32_expandloadsf128_maskz ((__v4sf *) __P,
6145 (__v4sf)
6146 _mm_setzero_ps (),
6147 (__mmask8)
6148 __U);
6151 extern __inline __m256i
6152 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6153 _mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
6155 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
6156 (__v4di) __W,
6157 (__mmask8) __U);
6160 extern __inline __m256i
6161 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6162 _mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A)
6164 return (__m256i) __builtin_ia32_expanddi256_maskz ((__v4di) __A,
6165 (__v4di)
6166 _mm256_setzero_si256 (),
6167 (__mmask8) __U);
6170 extern __inline __m256i
6171 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6172 _mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U,
6173 void const *__P)
6175 return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
6176 (__v4di) __W,
6177 (__mmask8)
6178 __U);
6181 extern __inline __m256i
6182 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6183 _mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
6185 return (__m256i) __builtin_ia32_expandloaddi256_maskz ((__v4di *) __P,
6186 (__v4di)
6187 _mm256_setzero_si256 (),
6188 (__mmask8)
6189 __U);
6192 extern __inline __m128i
6193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6194 _mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
6196 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
6197 (__v2di) __W,
6198 (__mmask8) __U);
6201 extern __inline __m128i
6202 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6203 _mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A)
6205 return (__m128i) __builtin_ia32_expanddi128_maskz ((__v2di) __A,
6206 (__v2di)
6207 _mm_setzero_si128 (),
6208 (__mmask8) __U);
6211 extern __inline __m128i
6212 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6213 _mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
6215 return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
6216 (__v2di) __W,
6217 (__mmask8)
6218 __U);
6221 extern __inline __m128i
6222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6223 _mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
6225 return (__m128i) __builtin_ia32_expandloaddi128_maskz ((__v2di *) __P,
6226 (__v2di)
6227 _mm_setzero_si128 (),
6228 (__mmask8)
6229 __U);
6232 extern __inline __m256i
6233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6234 _mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
6236 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
6237 (__v8si) __W,
6238 (__mmask8) __U);
6241 extern __inline __m256i
6242 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6243 _mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A)
6245 return (__m256i) __builtin_ia32_expandsi256_maskz ((__v8si) __A,
6246 (__v8si)
6247 _mm256_setzero_si256 (),
6248 (__mmask8) __U);
6251 extern __inline __m256i
6252 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6253 _mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U,
6254 void const *__P)
6256 return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
6257 (__v8si) __W,
6258 (__mmask8)
6259 __U);
6262 extern __inline __m256i
6263 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6264 _mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P)
6266 return (__m256i) __builtin_ia32_expandloadsi256_maskz ((__v8si *) __P,
6267 (__v8si)
6268 _mm256_setzero_si256 (),
6269 (__mmask8)
6270 __U);
6273 extern __inline __m128i
6274 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6275 _mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
6277 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
6278 (__v4si) __W,
6279 (__mmask8) __U);
6282 extern __inline __m128i
6283 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6284 _mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A)
6286 return (__m128i) __builtin_ia32_expandsi128_maskz ((__v4si) __A,
6287 (__v4si)
6288 _mm_setzero_si128 (),
6289 (__mmask8) __U);
6292 extern __inline __m128i
6293 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6294 _mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
6296 return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
6297 (__v4si) __W,
6298 (__mmask8)
6299 __U);
6302 extern __inline __m128i
6303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6304 _mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P)
6306 return (__m128i) __builtin_ia32_expandloadsi128_maskz ((__v4si *) __P,
6307 (__v4si)
6308 _mm_setzero_si128 (),
6309 (__mmask8)
6310 __U);
6313 extern __inline __m256d
6314 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6315 _mm256_permutex2var_pd (__m256d __A, __m256i __I, __m256d __B)
6317 return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
6318 /* idx */ ,
6319 (__v4df) __A,
6320 (__v4df) __B,
6321 (__mmask8) -
6325 extern __inline __m256d
6326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6327 _mm256_mask_permutex2var_pd (__m256d __A, __mmask8 __U, __m256i __I,
6328 __m256d __B)
6330 return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
6331 /* idx */ ,
6332 (__v4df) __A,
6333 (__v4df) __B,
6334 (__mmask8)
6335 __U);
6338 extern __inline __m256d
6339 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6340 _mm256_mask2_permutex2var_pd (__m256d __A, __m256i __I, __mmask8 __U,
6341 __m256d __B)
6343 return (__m256d) __builtin_ia32_vpermi2varpd256_mask ((__v4df) __A,
6344 (__v4di) __I
6345 /* idx */ ,
6346 (__v4df) __B,
6347 (__mmask8)
6348 __U);
6351 extern __inline __m256d
6352 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6353 _mm256_maskz_permutex2var_pd (__mmask8 __U, __m256d __A, __m256i __I,
6354 __m256d __B)
6356 return (__m256d) __builtin_ia32_vpermt2varpd256_maskz ((__v4di) __I
6357 /* idx */ ,
6358 (__v4df) __A,
6359 (__v4df) __B,
6360 (__mmask8)
6361 __U);
6364 extern __inline __m256
6365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6366 _mm256_permutex2var_ps (__m256 __A, __m256i __I, __m256 __B)
6368 return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
6369 /* idx */ ,
6370 (__v8sf) __A,
6371 (__v8sf) __B,
6372 (__mmask8) -1);
6375 extern __inline __m256
6376 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6377 _mm256_mask_permutex2var_ps (__m256 __A, __mmask8 __U, __m256i __I,
6378 __m256 __B)
6380 return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
6381 /* idx */ ,
6382 (__v8sf) __A,
6383 (__v8sf) __B,
6384 (__mmask8) __U);
6387 extern __inline __m256
6388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6389 _mm256_mask2_permutex2var_ps (__m256 __A, __m256i __I, __mmask8 __U,
6390 __m256 __B)
6392 return (__m256) __builtin_ia32_vpermi2varps256_mask ((__v8sf) __A,
6393 (__v8si) __I
6394 /* idx */ ,
6395 (__v8sf) __B,
6396 (__mmask8) __U);
6399 extern __inline __m256
6400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6401 _mm256_maskz_permutex2var_ps (__mmask8 __U, __m256 __A, __m256i __I,
6402 __m256 __B)
6404 return (__m256) __builtin_ia32_vpermt2varps256_maskz ((__v8si) __I
6405 /* idx */ ,
6406 (__v8sf) __A,
6407 (__v8sf) __B,
6408 (__mmask8)
6409 __U);
6412 extern __inline __m128i
6413 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6414 _mm_permutex2var_epi64 (__m128i __A, __m128i __I, __m128i __B)
6416 return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
6417 /* idx */ ,
6418 (__v2di) __A,
6419 (__v2di) __B,
6420 (__mmask8) -1);
6423 extern __inline __m128i
6424 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6425 _mm_mask_permutex2var_epi64 (__m128i __A, __mmask8 __U, __m128i __I,
6426 __m128i __B)
6428 return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
6429 /* idx */ ,
6430 (__v2di) __A,
6431 (__v2di) __B,
6432 (__mmask8) __U);
6435 extern __inline __m128i
6436 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6437 _mm_mask2_permutex2var_epi64 (__m128i __A, __m128i __I, __mmask8 __U,
6438 __m128i __B)
6440 return (__m128i) __builtin_ia32_vpermi2varq128_mask ((__v2di) __A,
6441 (__v2di) __I
6442 /* idx */ ,
6443 (__v2di) __B,
6444 (__mmask8) __U);
6447 extern __inline __m128i
6448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6449 _mm_maskz_permutex2var_epi64 (__mmask8 __U, __m128i __A, __m128i __I,
6450 __m128i __B)
6452 return (__m128i) __builtin_ia32_vpermt2varq128_maskz ((__v2di) __I
6453 /* idx */ ,
6454 (__v2di) __A,
6455 (__v2di) __B,
6456 (__mmask8)
6457 __U);
6460 extern __inline __m128i
6461 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6462 _mm_permutex2var_epi32 (__m128i __A, __m128i __I, __m128i __B)
6464 return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
6465 /* idx */ ,
6466 (__v4si) __A,
6467 (__v4si) __B,
6468 (__mmask8) -1);
6471 extern __inline __m128i
6472 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6473 _mm_mask_permutex2var_epi32 (__m128i __A, __mmask8 __U, __m128i __I,
6474 __m128i __B)
6476 return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
6477 /* idx */ ,
6478 (__v4si) __A,
6479 (__v4si) __B,
6480 (__mmask8) __U);
6483 extern __inline __m128i
6484 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6485 _mm_mask2_permutex2var_epi32 (__m128i __A, __m128i __I, __mmask8 __U,
6486 __m128i __B)
6488 return (__m128i) __builtin_ia32_vpermi2vard128_mask ((__v4si) __A,
6489 (__v4si) __I
6490 /* idx */ ,
6491 (__v4si) __B,
6492 (__mmask8) __U);
6495 extern __inline __m128i
6496 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6497 _mm_maskz_permutex2var_epi32 (__mmask8 __U, __m128i __A, __m128i __I,
6498 __m128i __B)
6500 return (__m128i) __builtin_ia32_vpermt2vard128_maskz ((__v4si) __I
6501 /* idx */ ,
6502 (__v4si) __A,
6503 (__v4si) __B,
6504 (__mmask8)
6505 __U);
6508 extern __inline __m256i
6509 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6510 _mm256_permutex2var_epi64 (__m256i __A, __m256i __I, __m256i __B)
6512 return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
6513 /* idx */ ,
6514 (__v4di) __A,
6515 (__v4di) __B,
6516 (__mmask8) -1);
6519 extern __inline __m256i
6520 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6521 _mm256_mask_permutex2var_epi64 (__m256i __A, __mmask8 __U, __m256i __I,
6522 __m256i __B)
6524 return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
6525 /* idx */ ,
6526 (__v4di) __A,
6527 (__v4di) __B,
6528 (__mmask8) __U);
6531 extern __inline __m256i
6532 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6533 _mm256_mask2_permutex2var_epi64 (__m256i __A, __m256i __I,
6534 __mmask8 __U, __m256i __B)
6536 return (__m256i) __builtin_ia32_vpermi2varq256_mask ((__v4di) __A,
6537 (__v4di) __I
6538 /* idx */ ,
6539 (__v4di) __B,
6540 (__mmask8) __U);
6543 extern __inline __m256i
6544 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6545 _mm256_maskz_permutex2var_epi64 (__mmask8 __U, __m256i __A,
6546 __m256i __I, __m256i __B)
6548 return (__m256i) __builtin_ia32_vpermt2varq256_maskz ((__v4di) __I
6549 /* idx */ ,
6550 (__v4di) __A,
6551 (__v4di) __B,
6552 (__mmask8)
6553 __U);
6556 extern __inline __m256i
6557 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6558 _mm256_permutex2var_epi32 (__m256i __A, __m256i __I, __m256i __B)
6560 return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
6561 /* idx */ ,
6562 (__v8si) __A,
6563 (__v8si) __B,
6564 (__mmask8) -1);
6567 extern __inline __m256i
6568 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6569 _mm256_mask_permutex2var_epi32 (__m256i __A, __mmask8 __U, __m256i __I,
6570 __m256i __B)
6572 return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
6573 /* idx */ ,
6574 (__v8si) __A,
6575 (__v8si) __B,
6576 (__mmask8) __U);
6579 extern __inline __m256i
6580 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6581 _mm256_mask2_permutex2var_epi32 (__m256i __A, __m256i __I,
6582 __mmask8 __U, __m256i __B)
6584 return (__m256i) __builtin_ia32_vpermi2vard256_mask ((__v8si) __A,
6585 (__v8si) __I
6586 /* idx */ ,
6587 (__v8si) __B,
6588 (__mmask8) __U);
6591 extern __inline __m256i
6592 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6593 _mm256_maskz_permutex2var_epi32 (__mmask8 __U, __m256i __A,
6594 __m256i __I, __m256i __B)
6596 return (__m256i) __builtin_ia32_vpermt2vard256_maskz ((__v8si) __I
6597 /* idx */ ,
6598 (__v8si) __A,
6599 (__v8si) __B,
6600 (__mmask8)
6601 __U);
6604 extern __inline __m128d
6605 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6606 _mm_permutex2var_pd (__m128d __A, __m128i __I, __m128d __B)
6608 return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
6609 /* idx */ ,
6610 (__v2df) __A,
6611 (__v2df) __B,
6612 (__mmask8) -
6616 extern __inline __m128d
6617 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6618 _mm_mask_permutex2var_pd (__m128d __A, __mmask8 __U, __m128i __I,
6619 __m128d __B)
6621 return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
6622 /* idx */ ,
6623 (__v2df) __A,
6624 (__v2df) __B,
6625 (__mmask8)
6626 __U);
6629 extern __inline __m128d
6630 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6631 _mm_mask2_permutex2var_pd (__m128d __A, __m128i __I, __mmask8 __U,
6632 __m128d __B)
6634 return (__m128d) __builtin_ia32_vpermi2varpd128_mask ((__v2df) __A,
6635 (__v2di) __I
6636 /* idx */ ,
6637 (__v2df) __B,
6638 (__mmask8)
6639 __U);
6642 extern __inline __m128d
6643 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6644 _mm_maskz_permutex2var_pd (__mmask8 __U, __m128d __A, __m128i __I,
6645 __m128d __B)
6647 return (__m128d) __builtin_ia32_vpermt2varpd128_maskz ((__v2di) __I
6648 /* idx */ ,
6649 (__v2df) __A,
6650 (__v2df) __B,
6651 (__mmask8)
6652 __U);
6655 extern __inline __m128
6656 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6657 _mm_permutex2var_ps (__m128 __A, __m128i __I, __m128 __B)
6659 return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
6660 /* idx */ ,
6661 (__v4sf) __A,
6662 (__v4sf) __B,
6663 (__mmask8) -1);
6666 extern __inline __m128
6667 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6668 _mm_mask_permutex2var_ps (__m128 __A, __mmask8 __U, __m128i __I,
6669 __m128 __B)
6671 return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
6672 /* idx */ ,
6673 (__v4sf) __A,
6674 (__v4sf) __B,
6675 (__mmask8) __U);
6678 extern __inline __m128
6679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6680 _mm_mask2_permutex2var_ps (__m128 __A, __m128i __I, __mmask8 __U,
6681 __m128 __B)
6683 return (__m128) __builtin_ia32_vpermi2varps128_mask ((__v4sf) __A,
6684 (__v4si) __I
6685 /* idx */ ,
6686 (__v4sf) __B,
6687 (__mmask8) __U);
6690 extern __inline __m128
6691 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6692 _mm_maskz_permutex2var_ps (__mmask8 __U, __m128 __A, __m128i __I,
6693 __m128 __B)
6695 return (__m128) __builtin_ia32_vpermt2varps128_maskz ((__v4si) __I
6696 /* idx */ ,
6697 (__v4sf) __A,
6698 (__v4sf) __B,
6699 (__mmask8)
6700 __U);
6703 extern __inline __m128i
6704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6705 _mm_srav_epi64 (__m128i __X, __m128i __Y)
6707 return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
6708 (__v2di) __Y,
6709 (__v2di)
6710 _mm_setzero_di (),
6711 (__mmask8) -1);
6714 extern __inline __m128i
6715 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6716 _mm_mask_srav_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
6717 __m128i __Y)
6719 return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
6720 (__v2di) __Y,
6721 (__v2di) __W,
6722 (__mmask8) __U);
6725 extern __inline __m128i
6726 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6727 _mm_maskz_srav_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
6729 return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
6730 (__v2di) __Y,
6731 (__v2di)
6732 _mm_setzero_di (),
6733 (__mmask8) __U);
6736 extern __inline __m256i
6737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6738 _mm256_mask_sllv_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
6739 __m256i __Y)
6741 return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X,
6742 (__v8si) __Y,
6743 (__v8si) __W,
6744 (__mmask8) __U);
6747 extern __inline __m256i
6748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6749 _mm256_maskz_sllv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
6751 return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X,
6752 (__v8si) __Y,
6753 (__v8si)
6754 _mm256_setzero_si256 (),
6755 (__mmask8) __U);
6758 extern __inline __m128i
6759 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6760 _mm_mask_sllv_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
6761 __m128i __Y)
6763 return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X,
6764 (__v4si) __Y,
6765 (__v4si) __W,
6766 (__mmask8) __U);
6769 extern __inline __m128i
6770 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6771 _mm_maskz_sllv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
6773 return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X,
6774 (__v4si) __Y,
6775 (__v4si)
6776 _mm_setzero_si128 (),
6777 (__mmask8) __U);
6780 extern __inline __m256i
6781 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6782 _mm256_mask_sllv_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
6783 __m256i __Y)
6785 return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X,
6786 (__v4di) __Y,
6787 (__v4di) __W,
6788 (__mmask8) __U);
6791 extern __inline __m256i
6792 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6793 _mm256_maskz_sllv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
6795 return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X,
6796 (__v4di) __Y,
6797 (__v4di)
6798 _mm256_setzero_si256 (),
6799 (__mmask8) __U);
6802 extern __inline __m128i
6803 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6804 _mm_mask_sllv_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
6805 __m128i __Y)
6807 return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
6808 (__v2di) __Y,
6809 (__v2di) __W,
6810 (__mmask8) __U);
6813 extern __inline __m128i
6814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6815 _mm_maskz_sllv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
6817 return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
6818 (__v2di) __Y,
6819 (__v2di)
6820 _mm_setzero_di (),
6821 (__mmask8) __U);
6824 extern __inline __m256i
6825 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6826 _mm256_mask_srav_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
6827 __m256i __Y)
6829 return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X,
6830 (__v8si) __Y,
6831 (__v8si) __W,
6832 (__mmask8) __U);
6835 extern __inline __m256i
6836 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6837 _mm256_maskz_srav_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
6839 return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X,
6840 (__v8si) __Y,
6841 (__v8si)
6842 _mm256_setzero_si256 (),
6843 (__mmask8) __U);
6846 extern __inline __m128i
6847 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6848 _mm_mask_srav_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
6849 __m128i __Y)
6851 return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X,
6852 (__v4si) __Y,
6853 (__v4si) __W,
6854 (__mmask8) __U);
6857 extern __inline __m128i
6858 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6859 _mm_maskz_srav_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
6861 return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X,
6862 (__v4si) __Y,
6863 (__v4si)
6864 _mm_setzero_si128 (),
6865 (__mmask8) __U);
6868 extern __inline __m256i
6869 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6870 _mm256_mask_srlv_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
6871 __m256i __Y)
6873 return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X,
6874 (__v8si) __Y,
6875 (__v8si) __W,
6876 (__mmask8) __U);
6879 extern __inline __m256i
6880 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6881 _mm256_maskz_srlv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
6883 return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X,
6884 (__v8si) __Y,
6885 (__v8si)
6886 _mm256_setzero_si256 (),
6887 (__mmask8) __U);
6890 extern __inline __m128i
6891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6892 _mm_mask_srlv_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
6893 __m128i __Y)
6895 return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X,
6896 (__v4si) __Y,
6897 (__v4si) __W,
6898 (__mmask8) __U);
6901 extern __inline __m128i
6902 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6903 _mm_maskz_srlv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
6905 return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X,
6906 (__v4si) __Y,
6907 (__v4si)
6908 _mm_setzero_si128 (),
6909 (__mmask8) __U);
6912 extern __inline __m256i
6913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6914 _mm256_mask_srlv_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
6915 __m256i __Y)
6917 return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X,
6918 (__v4di) __Y,
6919 (__v4di) __W,
6920 (__mmask8) __U);
6923 extern __inline __m256i
6924 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6925 _mm256_maskz_srlv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
6927 return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X,
6928 (__v4di) __Y,
6929 (__v4di)
6930 _mm256_setzero_si256 (),
6931 (__mmask8) __U);
6934 extern __inline __m128i
6935 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6936 _mm_mask_srlv_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
6937 __m128i __Y)
6939 return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
6940 (__v2di) __Y,
6941 (__v2di) __W,
6942 (__mmask8) __U);
6945 extern __inline __m128i
6946 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6947 _mm_maskz_srlv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
6949 return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
6950 (__v2di) __Y,
6951 (__v2di)
6952 _mm_setzero_di (),
6953 (__mmask8) __U);
6956 extern __inline __m256i
6957 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6958 _mm256_rolv_epi32 (__m256i __A, __m256i __B)
6960 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
6961 (__v8si) __B,
6962 (__v8si)
6963 _mm256_setzero_si256 (),
6964 (__mmask8) -1);
6967 extern __inline __m256i
6968 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6969 _mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
6970 __m256i __B)
6972 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
6973 (__v8si) __B,
6974 (__v8si) __W,
6975 (__mmask8) __U);
6978 extern __inline __m256i
6979 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6980 _mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
6982 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
6983 (__v8si) __B,
6984 (__v8si)
6985 _mm256_setzero_si256 (),
6986 (__mmask8) __U);
6989 extern __inline __m128i
6990 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6991 _mm_rolv_epi32 (__m128i __A, __m128i __B)
6993 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
6994 (__v4si) __B,
6995 (__v4si)
6996 _mm_setzero_si128 (),
6997 (__mmask8) -1);
7000 extern __inline __m128i
7001 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7002 _mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
7003 __m128i __B)
7005 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
7006 (__v4si) __B,
7007 (__v4si) __W,
7008 (__mmask8) __U);
7011 extern __inline __m128i
7012 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7013 _mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
7015 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
7016 (__v4si) __B,
7017 (__v4si)
7018 _mm_setzero_si128 (),
7019 (__mmask8) __U);
7022 extern __inline __m256i
7023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7024 _mm256_rorv_epi32 (__m256i __A, __m256i __B)
7026 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
7027 (__v8si) __B,
7028 (__v8si)
7029 _mm256_setzero_si256 (),
7030 (__mmask8) -1);
7033 extern __inline __m256i
7034 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7035 _mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
7036 __m256i __B)
7038 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
7039 (__v8si) __B,
7040 (__v8si) __W,
7041 (__mmask8) __U);
7044 extern __inline __m256i
7045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7046 _mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
7048 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
7049 (__v8si) __B,
7050 (__v8si)
7051 _mm256_setzero_si256 (),
7052 (__mmask8) __U);
7055 extern __inline __m128i
7056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7057 _mm_rorv_epi32 (__m128i __A, __m128i __B)
7059 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
7060 (__v4si) __B,
7061 (__v4si)
7062 _mm_setzero_si128 (),
7063 (__mmask8) -1);
7066 extern __inline __m128i
7067 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7068 _mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
7069 __m128i __B)
7071 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
7072 (__v4si) __B,
7073 (__v4si) __W,
7074 (__mmask8) __U);
7077 extern __inline __m128i
7078 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7079 _mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
7081 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
7082 (__v4si) __B,
7083 (__v4si)
7084 _mm_setzero_si128 (),
7085 (__mmask8) __U);
7088 extern __inline __m256i
7089 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7090 _mm256_rolv_epi64 (__m256i __A, __m256i __B)
7092 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
7093 (__v4di) __B,
7094 (__v4di)
7095 _mm256_setzero_si256 (),
7096 (__mmask8) -1);
7099 extern __inline __m256i
7100 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7101 _mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7102 __m256i __B)
7104 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
7105 (__v4di) __B,
7106 (__v4di) __W,
7107 (__mmask8) __U);
7110 extern __inline __m256i
7111 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7112 _mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7114 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
7115 (__v4di) __B,
7116 (__v4di)
7117 _mm256_setzero_si256 (),
7118 (__mmask8) __U);
7121 extern __inline __m128i
7122 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7123 _mm_rolv_epi64 (__m128i __A, __m128i __B)
7125 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
7126 (__v2di) __B,
7127 (__v2di)
7128 _mm_setzero_di (),
7129 (__mmask8) -1);
7132 extern __inline __m128i
7133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7134 _mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7135 __m128i __B)
7137 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
7138 (__v2di) __B,
7139 (__v2di) __W,
7140 (__mmask8) __U);
7143 extern __inline __m128i
7144 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7145 _mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7147 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
7148 (__v2di) __B,
7149 (__v2di)
7150 _mm_setzero_di (),
7151 (__mmask8) __U);
7154 extern __inline __m256i
7155 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7156 _mm256_rorv_epi64 (__m256i __A, __m256i __B)
7158 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
7159 (__v4di) __B,
7160 (__v4di)
7161 _mm256_setzero_si256 (),
7162 (__mmask8) -1);
7165 extern __inline __m256i
7166 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7167 _mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7168 __m256i __B)
7170 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
7171 (__v4di) __B,
7172 (__v4di) __W,
7173 (__mmask8) __U);
7176 extern __inline __m256i
7177 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7178 _mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7180 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
7181 (__v4di) __B,
7182 (__v4di)
7183 _mm256_setzero_si256 (),
7184 (__mmask8) __U);
7187 extern __inline __m128i
7188 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7189 _mm_rorv_epi64 (__m128i __A, __m128i __B)
7191 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
7192 (__v2di) __B,
7193 (__v2di)
7194 _mm_setzero_di (),
7195 (__mmask8) -1);
7198 extern __inline __m128i
7199 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7200 _mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7201 __m128i __B)
7203 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
7204 (__v2di) __B,
7205 (__v2di) __W,
7206 (__mmask8) __U);
7209 extern __inline __m128i
7210 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7211 _mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7213 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
7214 (__v2di) __B,
7215 (__v2di)
7216 _mm_setzero_di (),
7217 (__mmask8) __U);
7220 extern __inline __m256i
7221 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7222 _mm256_srav_epi64 (__m256i __X, __m256i __Y)
7224 return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
7225 (__v4di) __Y,
7226 (__v4di)
7227 _mm256_setzero_si256 (),
7228 (__mmask8) -1);
7231 extern __inline __m256i
7232 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7233 _mm256_mask_srav_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
7234 __m256i __Y)
7236 return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
7237 (__v4di) __Y,
7238 (__v4di) __W,
7239 (__mmask8) __U);
7242 extern __inline __m256i
7243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7244 _mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
7246 return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
7247 (__v4di) __Y,
7248 (__v4di)
7249 _mm256_setzero_si256 (),
7250 (__mmask8) __U);
7253 extern __inline __m256i
7254 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7255 _mm256_mask_and_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7256 __m256i __B)
7258 return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A,
7259 (__v4di) __B,
7260 (__v4di) __W, __U);
7263 extern __inline __m256i
7264 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7265 _mm256_maskz_and_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7267 return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A,
7268 (__v4di) __B,
7269 (__v4di)
7270 _mm256_setzero_pd (),
7271 __U);
7274 extern __inline __m128i
7275 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7276 _mm_mask_and_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7277 __m128i __B)
7279 return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A,
7280 (__v2di) __B,
7281 (__v2di) __W, __U);
7284 extern __inline __m128i
7285 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7286 _mm_maskz_and_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7288 return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A,
7289 (__v2di) __B,
7290 (__v2di)
7291 _mm_setzero_pd (),
7292 __U);
7295 extern __inline __m256i
7296 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7297 _mm256_mask_andnot_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7298 __m256i __B)
7300 return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A,
7301 (__v4di) __B,
7302 (__v4di) __W, __U);
7305 extern __inline __m256i
7306 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7307 _mm256_maskz_andnot_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7309 return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A,
7310 (__v4di) __B,
7311 (__v4di)
7312 _mm256_setzero_pd (),
7313 __U);
7316 extern __inline __m128i
7317 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7318 _mm_mask_andnot_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7319 __m128i __B)
7321 return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A,
7322 (__v2di) __B,
7323 (__v2di) __W, __U);
7326 extern __inline __m128i
7327 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7328 _mm_maskz_andnot_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7330 return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A,
7331 (__v2di) __B,
7332 (__v2di)
7333 _mm_setzero_pd (),
7334 __U);
7337 extern __inline __m256i
7338 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7339 _mm256_mask_or_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7340 __m256i __B)
7342 return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A,
7343 (__v4di) __B,
7344 (__v4di) __W,
7345 (__mmask8) __U);
7348 extern __inline __m256i
7349 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7350 _mm256_maskz_or_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7352 return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A,
7353 (__v4di) __B,
7354 (__v4di)
7355 _mm256_setzero_si256 (),
7356 (__mmask8) __U);
7359 extern __inline __m128i
7360 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7361 _mm_mask_or_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
7363 return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
7364 (__v2di) __B,
7365 (__v2di) __W,
7366 (__mmask8) __U);
7369 extern __inline __m128i
7370 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7371 _mm_maskz_or_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7373 return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
7374 (__v2di) __B,
7375 (__v2di)
7376 _mm_setzero_si128 (),
7377 (__mmask8) __U);
7380 extern __inline __m256i
7381 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7382 _mm256_mask_xor_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7383 __m256i __B)
7385 return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A,
7386 (__v4di) __B,
7387 (__v4di) __W,
7388 (__mmask8) __U);
7391 extern __inline __m256i
7392 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7393 _mm256_maskz_xor_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7395 return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A,
7396 (__v4di) __B,
7397 (__v4di)
7398 _mm256_setzero_si256 (),
7399 (__mmask8) __U);
7402 extern __inline __m128i
7403 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7404 _mm_mask_xor_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7405 __m128i __B)
7407 return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A,
7408 (__v2di) __B,
7409 (__v2di) __W,
7410 (__mmask8) __U);
7413 extern __inline __m128i
7414 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7415 _mm_maskz_xor_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7417 return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A,
7418 (__v2di) __B,
7419 (__v2di)
7420 _mm_setzero_si128 (),
7421 (__mmask8) __U);
7424 extern __inline __m256d
7425 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7426 _mm256_mask_max_pd (__m256d __W, __mmask8 __U, __m256d __A,
7427 __m256d __B)
7429 return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
7430 (__v4df) __B,
7431 (__v4df) __W,
7432 (__mmask8) __U);
7435 extern __inline __m256d
7436 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7437 _mm256_maskz_max_pd (__mmask8 __U, __m256d __A, __m256d __B)
7439 return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
7440 (__v4df) __B,
7441 (__v4df)
7442 _mm256_setzero_pd (),
7443 (__mmask8) __U);
7446 extern __inline __m256
7447 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7448 _mm256_mask_max_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7450 return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
7451 (__v8sf) __B,
7452 (__v8sf) __W,
7453 (__mmask8) __U);
7456 extern __inline __m256
7457 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7458 _mm256_maskz_max_ps (__mmask8 __U, __m256 __A, __m256 __B)
7460 return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
7461 (__v8sf) __B,
7462 (__v8sf)
7463 _mm256_setzero_ps (),
7464 (__mmask8) __U);
7467 extern __inline __m128
7468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7469 _mm_mask_div_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7471 return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
7472 (__v4sf) __B,
7473 (__v4sf) __W,
7474 (__mmask8) __U);
7477 extern __inline __m128
7478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7479 _mm_maskz_div_ps (__mmask8 __U, __m128 __A, __m128 __B)
7481 return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
7482 (__v4sf) __B,
7483 (__v4sf)
7484 _mm_setzero_ps (),
7485 (__mmask8) __U);
7488 extern __inline __m128d
7489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7490 _mm_mask_div_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7492 return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
7493 (__v2df) __B,
7494 (__v2df) __W,
7495 (__mmask8) __U);
7498 extern __inline __m128d
7499 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7500 _mm_maskz_div_pd (__mmask8 __U, __m128d __A, __m128d __B)
7502 return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
7503 (__v2df) __B,
7504 (__v2df)
7505 _mm_setzero_pd (),
7506 (__mmask8) __U);
7509 extern __inline __m256d
7510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7511 _mm256_mask_min_pd (__m256d __W, __mmask8 __U, __m256d __A,
7512 __m256d __B)
7514 return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
7515 (__v4df) __B,
7516 (__v4df) __W,
7517 (__mmask8) __U);
7520 extern __inline __m256d
7521 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7522 _mm256_mask_div_pd (__m256d __W, __mmask8 __U, __m256d __A,
7523 __m256d __B)
7525 return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
7526 (__v4df) __B,
7527 (__v4df) __W,
7528 (__mmask8) __U);
7531 extern __inline __m256d
7532 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7533 _mm256_maskz_min_pd (__mmask8 __U, __m256d __A, __m256d __B)
7535 return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
7536 (__v4df) __B,
7537 (__v4df)
7538 _mm256_setzero_pd (),
7539 (__mmask8) __U);
7542 extern __inline __m256
7543 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7544 _mm256_mask_min_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7546 return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
7547 (__v8sf) __B,
7548 (__v8sf) __W,
7549 (__mmask8) __U);
7552 extern __inline __m256d
7553 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7554 _mm256_maskz_div_pd (__mmask8 __U, __m256d __A, __m256d __B)
7556 return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
7557 (__v4df) __B,
7558 (__v4df)
7559 _mm256_setzero_pd (),
7560 (__mmask8) __U);
7563 extern __inline __m256
7564 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7565 _mm256_mask_div_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7567 return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
7568 (__v8sf) __B,
7569 (__v8sf) __W,
7570 (__mmask8) __U);
7573 extern __inline __m256
7574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7575 _mm256_maskz_min_ps (__mmask8 __U, __m256 __A, __m256 __B)
7577 return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
7578 (__v8sf) __B,
7579 (__v8sf)
7580 _mm256_setzero_ps (),
7581 (__mmask8) __U);
7584 extern __inline __m256
7585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7586 _mm256_maskz_div_ps (__mmask8 __U, __m256 __A, __m256 __B)
7588 return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
7589 (__v8sf) __B,
7590 (__v8sf)
7591 _mm256_setzero_ps (),
7592 (__mmask8) __U);
7595 extern __inline __m128
7596 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7597 _mm_mask_min_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7599 return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
7600 (__v4sf) __B,
7601 (__v4sf) __W,
7602 (__mmask8) __U);
7605 extern __inline __m128
7606 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7607 _mm_mask_mul_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7609 return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
7610 (__v4sf) __B,
7611 (__v4sf) __W,
7612 (__mmask8) __U);
7615 extern __inline __m128
7616 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7617 _mm_maskz_min_ps (__mmask8 __U, __m128 __A, __m128 __B)
7619 return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
7620 (__v4sf) __B,
7621 (__v4sf)
7622 _mm_setzero_ps (),
7623 (__mmask8) __U);
7626 extern __inline __m128
7627 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7628 _mm_maskz_mul_ps (__mmask8 __U, __m128 __A, __m128 __B)
7630 return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
7631 (__v4sf) __B,
7632 (__v4sf)
7633 _mm_setzero_ps (),
7634 (__mmask8) __U);
7637 extern __inline __m128
7638 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7639 _mm_mask_max_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7641 return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
7642 (__v4sf) __B,
7643 (__v4sf) __W,
7644 (__mmask8) __U);
7647 extern __inline __m128
7648 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7649 _mm_maskz_max_ps (__mmask8 __U, __m128 __A, __m128 __B)
7651 return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
7652 (__v4sf) __B,
7653 (__v4sf)
7654 _mm_setzero_ps (),
7655 (__mmask8) __U);
7658 extern __inline __m128d
7659 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7660 _mm_mask_min_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7662 return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
7663 (__v2df) __B,
7664 (__v2df) __W,
7665 (__mmask8) __U);
7668 extern __inline __m128d
7669 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7670 _mm_maskz_min_pd (__mmask8 __U, __m128d __A, __m128d __B)
7672 return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
7673 (__v2df) __B,
7674 (__v2df)
7675 _mm_setzero_pd (),
7676 (__mmask8) __U);
7679 extern __inline __m128d
7680 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7681 _mm_mask_max_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7683 return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
7684 (__v2df) __B,
7685 (__v2df) __W,
7686 (__mmask8) __U);
7689 extern __inline __m128d
7690 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7691 _mm_maskz_max_pd (__mmask8 __U, __m128d __A, __m128d __B)
7693 return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
7694 (__v2df) __B,
7695 (__v2df)
7696 _mm_setzero_pd (),
7697 (__mmask8) __U);
7700 extern __inline __m128d
7701 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7702 _mm_mask_mul_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7704 return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
7705 (__v2df) __B,
7706 (__v2df) __W,
7707 (__mmask8) __U);
7710 extern __inline __m128d
7711 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7712 _mm_maskz_mul_pd (__mmask8 __U, __m128d __A, __m128d __B)
7714 return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
7715 (__v2df) __B,
7716 (__v2df)
7717 _mm_setzero_pd (),
7718 (__mmask8) __U);
7721 extern __inline __m256
7722 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7723 _mm256_mask_mul_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7725 return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
7726 (__v8sf) __B,
7727 (__v8sf) __W,
7728 (__mmask8) __U);
7731 extern __inline __m256
7732 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7733 _mm256_maskz_mul_ps (__mmask8 __U, __m256 __A, __m256 __B)
7735 return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
7736 (__v8sf) __B,
7737 (__v8sf)
7738 _mm256_setzero_ps (),
7739 (__mmask8) __U);
7742 extern __inline __m256d
7743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7744 _mm256_mask_mul_pd (__m256d __W, __mmask8 __U, __m256d __A,
7745 __m256d __B)
7747 return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
7748 (__v4df) __B,
7749 (__v4df) __W,
7750 (__mmask8) __U);
7753 extern __inline __m256d
7754 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7755 _mm256_maskz_mul_pd (__mmask8 __U, __m256d __A, __m256d __B)
7757 return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
7758 (__v4df) __B,
7759 (__v4df)
7760 _mm256_setzero_pd (),
7761 (__mmask8) __U);
7764 extern __inline __m256i
7765 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7766 _mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B)
7768 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
7769 (__v4di) __B,
7770 (__v4di)
7771 _mm256_setzero_si256 (),
7772 __M);
7775 extern __inline __m256i
7776 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7777 _mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
7778 __m256i __B)
7780 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
7781 (__v4di) __B,
7782 (__v4di) __W, __M);
7785 extern __inline __m256i
7786 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7787 _mm256_min_epi64 (__m256i __A, __m256i __B)
7789 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
7790 (__v4di) __B,
7791 (__v4di)
7792 _mm256_setzero_si256 (),
7793 (__mmask8) -1);
7796 extern __inline __m256i
7797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7798 _mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
7799 __m256i __B)
7801 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
7802 (__v4di) __B,
7803 (__v4di) __W, __M);
7806 extern __inline __m256i
7807 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7808 _mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B)
7810 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
7811 (__v4di) __B,
7812 (__v4di)
7813 _mm256_setzero_si256 (),
7814 __M);
7817 extern __inline __m256i
7818 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7819 _mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B)
7821 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
7822 (__v4di) __B,
7823 (__v4di)
7824 _mm256_setzero_si256 (),
7825 __M);
7828 extern __inline __m256i
7829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7830 _mm256_max_epi64 (__m256i __A, __m256i __B)
7832 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
7833 (__v4di) __B,
7834 (__v4di)
7835 _mm256_setzero_si256 (),
7836 (__mmask8) -1);
7839 extern __inline __m256i
7840 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7841 _mm256_max_epu64 (__m256i __A, __m256i __B)
7843 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
7844 (__v4di) __B,
7845 (__v4di)
7846 _mm256_setzero_si256 (),
7847 (__mmask8) -1);
7850 extern __inline __m256i
7851 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7852 _mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
7853 __m256i __B)
7855 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
7856 (__v4di) __B,
7857 (__v4di) __W, __M);
7860 extern __inline __m256i
7861 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7862 _mm256_min_epu64 (__m256i __A, __m256i __B)
7864 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
7865 (__v4di) __B,
7866 (__v4di)
7867 _mm256_setzero_si256 (),
7868 (__mmask8) -1);
7871 extern __inline __m256i
7872 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7873 _mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
7874 __m256i __B)
7876 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
7877 (__v4di) __B,
7878 (__v4di) __W, __M);
7881 extern __inline __m256i
7882 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7883 _mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B)
7885 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
7886 (__v4di) __B,
7887 (__v4di)
7888 _mm256_setzero_si256 (),
7889 __M);
7892 extern __inline __m256i
7893 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7894 _mm256_maskz_max_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
7896 return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
7897 (__v8si) __B,
7898 (__v8si)
7899 _mm256_setzero_si256 (),
7900 __M);
7903 extern __inline __m256i
7904 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7905 _mm256_mask_max_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
7906 __m256i __B)
7908 return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
7909 (__v8si) __B,
7910 (__v8si) __W, __M);
7913 extern __inline __m256i
7914 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7915 _mm256_maskz_min_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
7917 return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
7918 (__v8si) __B,
7919 (__v8si)
7920 _mm256_setzero_si256 (),
7921 __M);
7924 extern __inline __m256i
7925 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7926 _mm256_mask_min_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
7927 __m256i __B)
7929 return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
7930 (__v8si) __B,
7931 (__v8si) __W, __M);
7934 extern __inline __m256i
7935 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7936 _mm256_maskz_max_epu32 (__mmask8 __M, __m256i __A, __m256i __B)
7938 return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
7939 (__v8si) __B,
7940 (__v8si)
7941 _mm256_setzero_si256 (),
7942 __M);
7945 extern __inline __m256i
7946 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7947 _mm256_mask_max_epu32 (__m256i __W, __mmask8 __M, __m256i __A,
7948 __m256i __B)
7950 return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
7951 (__v8si) __B,
7952 (__v8si) __W, __M);
7955 extern __inline __m256i
7956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7957 _mm256_maskz_min_epu32 (__mmask8 __M, __m256i __A, __m256i __B)
7959 return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
7960 (__v8si) __B,
7961 (__v8si)
7962 _mm256_setzero_si256 (),
7963 __M);
7966 extern __inline __m256i
7967 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7968 _mm256_mask_min_epu32 (__m256i __W, __mmask8 __M, __m256i __A,
7969 __m256i __B)
7971 return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
7972 (__v8si) __B,
7973 (__v8si) __W, __M);
7976 extern __inline __m128i
7977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7978 _mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B)
7980 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
7981 (__v2di) __B,
7982 (__v2di)
7983 _mm_setzero_si128 (),
7984 __M);
7987 extern __inline __m128i
7988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7989 _mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
7990 __m128i __B)
7992 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
7993 (__v2di) __B,
7994 (__v2di) __W, __M);
7997 extern __inline __m128i
7998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7999 _mm_min_epi64 (__m128i __A, __m128i __B)
8001 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
8002 (__v2di) __B,
8003 (__v2di)
8004 _mm_setzero_di (),
8005 (__mmask8) -1);
8008 extern __inline __m128i
8009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8010 _mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
8011 __m128i __B)
8013 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
8014 (__v2di) __B,
8015 (__v2di) __W, __M);
8018 extern __inline __m128i
8019 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8020 _mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B)
8022 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
8023 (__v2di) __B,
8024 (__v2di)
8025 _mm_setzero_si128 (),
8026 __M);
8029 extern __inline __m128i
8030 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8031 _mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B)
8033 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
8034 (__v2di) __B,
8035 (__v2di)
8036 _mm_setzero_si128 (),
8037 __M);
8040 extern __inline __m128i
8041 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8042 _mm_max_epi64 (__m128i __A, __m128i __B)
8044 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
8045 (__v2di) __B,
8046 (__v2di)
8047 _mm_setzero_di (),
8048 (__mmask8) -1);
8051 extern __inline __m128i
8052 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8053 _mm_max_epu64 (__m128i __A, __m128i __B)
8055 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
8056 (__v2di) __B,
8057 (__v2di)
8058 _mm_setzero_di (),
8059 (__mmask8) -1);
8062 extern __inline __m128i
8063 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8064 _mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
8065 __m128i __B)
8067 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
8068 (__v2di) __B,
8069 (__v2di) __W, __M);
8072 extern __inline __m128i
8073 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8074 _mm_min_epu64 (__m128i __A, __m128i __B)
8076 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
8077 (__v2di) __B,
8078 (__v2di)
8079 _mm_setzero_di (),
8080 (__mmask8) -1);
8083 extern __inline __m128i
8084 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8085 _mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
8086 __m128i __B)
8088 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
8089 (__v2di) __B,
8090 (__v2di) __W, __M);
8093 extern __inline __m128i
8094 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8095 _mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B)
8097 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
8098 (__v2di) __B,
8099 (__v2di)
8100 _mm_setzero_si128 (),
8101 __M);
8104 extern __inline __m128i
8105 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8106 _mm_maskz_max_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
8108 return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
8109 (__v4si) __B,
8110 (__v4si)
8111 _mm_setzero_si128 (),
8112 __M);
8115 extern __inline __m128i
8116 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8117 _mm_mask_max_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
8118 __m128i __B)
8120 return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
8121 (__v4si) __B,
8122 (__v4si) __W, __M);
8125 extern __inline __m128i
8126 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8127 _mm_maskz_min_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
8129 return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
8130 (__v4si) __B,
8131 (__v4si)
8132 _mm_setzero_si128 (),
8133 __M);
8136 extern __inline __m128i
8137 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8138 _mm_mask_min_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
8139 __m128i __B)
8141 return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
8142 (__v4si) __B,
8143 (__v4si) __W, __M);
8146 extern __inline __m128i
8147 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8148 _mm_maskz_max_epu32 (__mmask8 __M, __m128i __A, __m128i __B)
8150 return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
8151 (__v4si) __B,
8152 (__v4si)
8153 _mm_setzero_si128 (),
8154 __M);
8157 extern __inline __m128i
8158 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8159 _mm_mask_max_epu32 (__m128i __W, __mmask8 __M, __m128i __A,
8160 __m128i __B)
8162 return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
8163 (__v4si) __B,
8164 (__v4si) __W, __M);
8167 extern __inline __m128i
8168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8169 _mm_maskz_min_epu32 (__mmask8 __M, __m128i __A, __m128i __B)
8171 return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
8172 (__v4si) __B,
8173 (__v4si)
8174 _mm_setzero_si128 (),
8175 __M);
8178 extern __inline __m128i
8179 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8180 _mm_mask_min_epu32 (__m128i __W, __mmask8 __M, __m128i __A,
8181 __m128i __B)
8183 return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
8184 (__v4si) __B,
8185 (__v4si) __W, __M);
8188 #ifndef __AVX512CD__
8189 #pragma GCC push_options
8190 #pragma GCC target("avx512vl,avx512cd")
8191 #define __DISABLE_AVX512VLCD__
8192 #endif
8194 extern __inline __m128i
8195 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8196 _mm_broadcastmb_epi64 (__mmask8 __A)
8198 return (__m128i) __builtin_ia32_broadcastmb128 (__A);
8201 extern __inline __m256i
8202 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8203 _mm256_broadcastmb_epi64 (__mmask8 __A)
8205 return (__m256i) __builtin_ia32_broadcastmb256 (__A);
8208 extern __inline __m128i
8209 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8210 _mm_broadcastmw_epi32 (__mmask16 __A)
8212 return (__m128i) __builtin_ia32_broadcastmw128 (__A);
8215 extern __inline __m256i
8216 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8217 _mm256_broadcastmw_epi32 (__mmask16 __A)
8219 return (__m256i) __builtin_ia32_broadcastmw256 (__A);
8222 extern __inline __m256i
8223 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8224 _mm256_lzcnt_epi32 (__m256i __A)
8226 return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
8227 (__v8si)
8228 _mm256_setzero_si256 (),
8229 (__mmask8) -1);
8232 extern __inline __m256i
8233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8234 _mm256_mask_lzcnt_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
8236 return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
8237 (__v8si) __W,
8238 (__mmask8) __U);
8241 extern __inline __m256i
8242 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8243 _mm256_maskz_lzcnt_epi32 (__mmask8 __U, __m256i __A)
8245 return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
8246 (__v8si)
8247 _mm256_setzero_si256 (),
8248 (__mmask8) __U);
8251 extern __inline __m256i
8252 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8253 _mm256_lzcnt_epi64 (__m256i __A)
8255 return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
8256 (__v4di)
8257 _mm256_setzero_si256 (),
8258 (__mmask8) -1);
8261 extern __inline __m256i
8262 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8263 _mm256_mask_lzcnt_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
8265 return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
8266 (__v4di) __W,
8267 (__mmask8) __U);
8270 extern __inline __m256i
8271 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8272 _mm256_maskz_lzcnt_epi64 (__mmask8 __U, __m256i __A)
8274 return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
8275 (__v4di)
8276 _mm256_setzero_si256 (),
8277 (__mmask8) __U);
8280 extern __inline __m256i
8281 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8282 _mm256_conflict_epi64 (__m256i __A)
8284 return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
8285 (__v4di)
8286 _mm256_setzero_si256 (),
8287 (__mmask8) -
8291 extern __inline __m256i
8292 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8293 _mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
8295 return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
8296 (__v4di) __W,
8297 (__mmask8)
8298 __U);
8301 extern __inline __m256i
8302 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8303 _mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A)
8305 return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
8306 (__v4di)
8307 _mm256_setzero_si256 (),
8308 (__mmask8)
8309 __U);
8312 extern __inline __m256i
8313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8314 _mm256_conflict_epi32 (__m256i __A)
8316 return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
8317 (__v8si)
8318 _mm256_setzero_si256 (),
8319 (__mmask8) -
8323 extern __inline __m256i
8324 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8325 _mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
8327 return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
8328 (__v8si) __W,
8329 (__mmask8)
8330 __U);
8333 extern __inline __m256i
8334 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8335 _mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A)
8337 return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
8338 (__v8si)
8339 _mm256_setzero_si256 (),
8340 (__mmask8)
8341 __U);
8344 extern __inline __m128i
8345 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8346 _mm_lzcnt_epi32 (__m128i __A)
8348 return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
8349 (__v4si)
8350 _mm_setzero_si128 (),
8351 (__mmask8) -1);
8354 extern __inline __m128i
8355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8356 _mm_mask_lzcnt_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
8358 return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
8359 (__v4si) __W,
8360 (__mmask8) __U);
8363 extern __inline __m128i
8364 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8365 _mm_maskz_lzcnt_epi32 (__mmask8 __U, __m128i __A)
8367 return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
8368 (__v4si)
8369 _mm_setzero_si128 (),
8370 (__mmask8) __U);
8373 extern __inline __m128i
8374 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8375 _mm_lzcnt_epi64 (__m128i __A)
8377 return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
8378 (__v2di)
8379 _mm_setzero_di (),
8380 (__mmask8) -1);
8383 extern __inline __m128i
8384 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8385 _mm_mask_lzcnt_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
8387 return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
8388 (__v2di) __W,
8389 (__mmask8) __U);
8392 extern __inline __m128i
8393 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8394 _mm_maskz_lzcnt_epi64 (__mmask8 __U, __m128i __A)
8396 return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
8397 (__v2di)
8398 _mm_setzero_di (),
8399 (__mmask8) __U);
8402 extern __inline __m128i
8403 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8404 _mm_conflict_epi64 (__m128i __A)
8406 return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
8407 (__v2di)
8408 _mm_setzero_di (),
8409 (__mmask8) -
8413 extern __inline __m128i
8414 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8415 _mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
8417 return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
8418 (__v2di) __W,
8419 (__mmask8)
8420 __U);
8423 extern __inline __m128i
8424 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8425 _mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A)
8427 return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
8428 (__v2di)
8429 _mm_setzero_di (),
8430 (__mmask8)
8431 __U);
8434 extern __inline __m128i
8435 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8436 _mm_conflict_epi32 (__m128i __A)
8438 return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
8439 (__v4si)
8440 _mm_setzero_si128 (),
8441 (__mmask8) -
8445 extern __inline __m128i
8446 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8447 _mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
8449 return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
8450 (__v4si) __W,
8451 (__mmask8)
8452 __U);
8455 extern __inline __m128i
8456 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8457 _mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A)
8459 return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
8460 (__v4si)
8461 _mm_setzero_si128 (),
8462 (__mmask8)
8463 __U);
8466 #ifdef __DISABLE_AVX512VLCD__
8467 #pragma GCC pop_options
8468 #endif
8470 extern __inline __m256d
8471 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8472 _mm256_mask_unpacklo_pd (__m256d __W, __mmask8 __U, __m256d __A,
8473 __m256d __B)
8475 return (__m256d) __builtin_ia32_unpcklpd256_mask ((__v4df) __A,
8476 (__v4df) __B,
8477 (__v4df) __W,
8478 (__mmask8) __U);
8481 extern __inline __m256d
8482 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8483 _mm256_maskz_unpacklo_pd (__mmask8 __U, __m256d __A, __m256d __B)
8485 return (__m256d) __builtin_ia32_unpcklpd256_mask ((__v4df) __A,
8486 (__v4df) __B,
8487 (__v4df)
8488 _mm256_setzero_pd (),
8489 (__mmask8) __U);
8492 extern __inline __m128d
8493 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8494 _mm_mask_unpacklo_pd (__m128d __W, __mmask8 __U, __m128d __A,
8495 __m128d __B)
8497 return (__m128d) __builtin_ia32_unpcklpd128_mask ((__v2df) __A,
8498 (__v2df) __B,
8499 (__v2df) __W,
8500 (__mmask8) __U);
8503 extern __inline __m128d
8504 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8505 _mm_maskz_unpacklo_pd (__mmask8 __U, __m128d __A, __m128d __B)
8507 return (__m128d) __builtin_ia32_unpcklpd128_mask ((__v2df) __A,
8508 (__v2df) __B,
8509 (__v2df)
8510 _mm_setzero_pd (),
8511 (__mmask8) __U);
8514 extern __inline __m256
8515 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8516 _mm256_mask_unpacklo_ps (__m256 __W, __mmask8 __U, __m256 __A,
8517 __m256 __B)
8519 return (__m256) __builtin_ia32_unpcklps256_mask ((__v8sf) __A,
8520 (__v8sf) __B,
8521 (__v8sf) __W,
8522 (__mmask8) __U);
8525 extern __inline __m256d
8526 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8527 _mm256_mask_unpackhi_pd (__m256d __W, __mmask8 __U, __m256d __A,
8528 __m256d __B)
8530 return (__m256d) __builtin_ia32_unpckhpd256_mask ((__v4df) __A,
8531 (__v4df) __B,
8532 (__v4df) __W,
8533 (__mmask8) __U);
8536 extern __inline __m256d
8537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8538 _mm256_maskz_unpackhi_pd (__mmask8 __U, __m256d __A, __m256d __B)
8540 return (__m256d) __builtin_ia32_unpckhpd256_mask ((__v4df) __A,
8541 (__v4df) __B,
8542 (__v4df)
8543 _mm256_setzero_pd (),
8544 (__mmask8) __U);
8547 extern __inline __m128d
8548 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8549 _mm_mask_unpackhi_pd (__m128d __W, __mmask8 __U, __m128d __A,
8550 __m128d __B)
8552 return (__m128d) __builtin_ia32_unpckhpd128_mask ((__v2df) __A,
8553 (__v2df) __B,
8554 (__v2df) __W,
8555 (__mmask8) __U);
8558 extern __inline __m128d
8559 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8560 _mm_maskz_unpackhi_pd (__mmask8 __U, __m128d __A, __m128d __B)
8562 return (__m128d) __builtin_ia32_unpckhpd128_mask ((__v2df) __A,
8563 (__v2df) __B,
8564 (__v2df)
8565 _mm_setzero_pd (),
8566 (__mmask8) __U);
8569 extern __inline __m256
8570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8571 _mm256_mask_unpackhi_ps (__m256 __W, __mmask8 __U, __m256 __A,
8572 __m256 __B)
8574 return (__m256) __builtin_ia32_unpckhps256_mask ((__v8sf) __A,
8575 (__v8sf) __B,
8576 (__v8sf) __W,
8577 (__mmask8) __U);
8580 extern __inline __m256
8581 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8582 _mm256_maskz_unpackhi_ps (__mmask8 __U, __m256 __A, __m256 __B)
8584 return (__m256) __builtin_ia32_unpckhps256_mask ((__v8sf) __A,
8585 (__v8sf) __B,
8586 (__v8sf)
8587 _mm256_setzero_ps (),
8588 (__mmask8) __U);
8591 extern __inline __m128
8592 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8593 _mm_mask_unpackhi_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8595 return (__m128) __builtin_ia32_unpckhps128_mask ((__v4sf) __A,
8596 (__v4sf) __B,
8597 (__v4sf) __W,
8598 (__mmask8) __U);
8601 extern __inline __m128
8602 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8603 _mm_maskz_unpackhi_ps (__mmask8 __U, __m128 __A, __m128 __B)
8605 return (__m128) __builtin_ia32_unpckhps128_mask ((__v4sf) __A,
8606 (__v4sf) __B,
8607 (__v4sf)
8608 _mm_setzero_ps (),
8609 (__mmask8) __U);
8612 extern __inline __m128
8613 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8614 _mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A)
8616 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8617 (__v4sf) __W,
8618 (__mmask8) __U);
8621 extern __inline __m128
8622 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8623 _mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8625 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8626 (__v4sf)
8627 _mm_setzero_ps (),
8628 (__mmask8) __U);
8631 extern __inline __m256
8632 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8633 _mm256_maskz_unpacklo_ps (__mmask8 __U, __m256 __A, __m256 __B)
8635 return (__m256) __builtin_ia32_unpcklps256_mask ((__v8sf) __A,
8636 (__v8sf) __B,
8637 (__v8sf)
8638 _mm256_setzero_ps (),
8639 (__mmask8) __U);
8642 extern __inline __m256
8643 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8644 _mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A)
8646 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8647 (__v8sf) __W,
8648 (__mmask8) __U);
8651 extern __inline __m256
8652 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8653 _mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8655 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8656 (__v8sf)
8657 _mm256_setzero_ps (),
8658 (__mmask8) __U);
8661 extern __inline __m128
8662 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8663 _mm_mask_unpacklo_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8665 return (__m128) __builtin_ia32_unpcklps128_mask ((__v4sf) __A,
8666 (__v4sf) __B,
8667 (__v4sf) __W,
8668 (__mmask8) __U);
8671 extern __inline __m128
8672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8673 _mm_maskz_unpacklo_ps (__mmask8 __U, __m128 __A, __m128 __B)
8675 return (__m128) __builtin_ia32_unpcklps128_mask ((__v4sf) __A,
8676 (__v4sf) __B,
8677 (__v4sf)
8678 _mm_setzero_ps (),
8679 (__mmask8) __U);
8682 extern __inline __m256i
8683 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8684 _mm256_mask_sra_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
8685 __m128i __B)
8687 return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A,
8688 (__v4si) __B,
8689 (__v8si) __W,
8690 (__mmask8) __U);
8693 extern __inline __m256i
8694 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8695 _mm256_maskz_sra_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
8697 return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A,
8698 (__v4si) __B,
8699 (__v8si)
8700 _mm256_setzero_si256 (),
8701 (__mmask8) __U);
8704 extern __inline __m128i
8705 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8706 _mm_mask_sra_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
8707 __m128i __B)
8709 return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A,
8710 (__v4si) __B,
8711 (__v4si) __W,
8712 (__mmask8) __U);
8715 extern __inline __m128i
8716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8717 _mm_maskz_sra_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
8719 return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A,
8720 (__v4si) __B,
8721 (__v4si)
8722 _mm_setzero_si128 (),
8723 (__mmask8) __U);
8726 extern __inline __m256i
8727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8728 _mm256_sra_epi64 (__m256i __A, __m128i __B)
8730 return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
8731 (__v2di) __B,
8732 (__v4di)
8733 _mm256_setzero_si256 (),
8734 (__mmask8) -1);
8737 extern __inline __m256i
8738 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8739 _mm256_mask_sra_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
8740 __m128i __B)
8742 return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
8743 (__v2di) __B,
8744 (__v4di) __W,
8745 (__mmask8) __U);
8748 extern __inline __m256i
8749 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8750 _mm256_maskz_sra_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
8752 return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
8753 (__v2di) __B,
8754 (__v4di)
8755 _mm256_setzero_si256 (),
8756 (__mmask8) __U);
8759 extern __inline __m128i
8760 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8761 _mm_sra_epi64 (__m128i __A, __m128i __B)
8763 return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
8764 (__v2di) __B,
8765 (__v2di)
8766 _mm_setzero_di (),
8767 (__mmask8) -1);
8770 extern __inline __m128i
8771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8772 _mm_mask_sra_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
8773 __m128i __B)
8775 return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
8776 (__v2di) __B,
8777 (__v2di) __W,
8778 (__mmask8) __U);
8781 extern __inline __m128i
8782 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8783 _mm_maskz_sra_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
8785 return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
8786 (__v2di) __B,
8787 (__v2di)
8788 _mm_setzero_di (),
8789 (__mmask8) __U);
8792 extern __inline __m128i
8793 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8794 _mm_mask_sll_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
8795 __m128i __B)
8797 return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A,
8798 (__v4si) __B,
8799 (__v4si) __W,
8800 (__mmask8) __U);
8803 extern __inline __m128i
8804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8805 _mm_maskz_sll_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
8807 return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A,
8808 (__v4si) __B,
8809 (__v4si)
8810 _mm_setzero_si128 (),
8811 (__mmask8) __U);
8814 extern __inline __m128i
8815 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8816 _mm_mask_sll_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
8817 __m128i __B)
8819 return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
8820 (__v2di) __B,
8821 (__v2di) __W,
8822 (__mmask8) __U);
8825 extern __inline __m128i
8826 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8827 _mm_maskz_sll_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
8829 return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
8830 (__v2di) __B,
8831 (__v2di)
8832 _mm_setzero_di (),
8833 (__mmask8) __U);
8836 extern __inline __m256i
8837 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8838 _mm256_mask_sll_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
8839 __m128i __B)
8841 return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A,
8842 (__v4si) __B,
8843 (__v8si) __W,
8844 (__mmask8) __U);
8847 extern __inline __m256i
8848 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8849 _mm256_maskz_sll_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
8851 return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A,
8852 (__v4si) __B,
8853 (__v8si)
8854 _mm256_setzero_si256 (),
8855 (__mmask8) __U);
8858 extern __inline __m256i
8859 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8860 _mm256_mask_sll_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
8861 __m128i __B)
8863 return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A,
8864 (__v2di) __B,
8865 (__v4di) __W,
8866 (__mmask8) __U);
8869 extern __inline __m256i
8870 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8871 _mm256_maskz_sll_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
8873 return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A,
8874 (__v2di) __B,
8875 (__v4di)
8876 _mm256_setzero_si256 (),
8877 (__mmask8) __U);
8880 extern __inline __m256
8881 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8882 _mm256_mask_permutexvar_ps (__m256 __W, __mmask8 __U, __m256i __X,
8883 __m256 __Y)
8885 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
8886 (__v8si) __X,
8887 (__v8sf) __W,
8888 (__mmask8) __U);
8891 extern __inline __m256
8892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8893 _mm256_maskz_permutexvar_ps (__mmask8 __U, __m256i __X, __m256 __Y)
8895 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
8896 (__v8si) __X,
8897 (__v8sf)
8898 _mm256_setzero_ps (),
8899 (__mmask8) __U);
8902 extern __inline __m256d
8903 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8904 _mm256_permutexvar_pd (__m256i __X, __m256d __Y)
8906 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8907 (__v4di) __X,
8908 (__v4df)
8909 _mm256_setzero_pd (),
8910 (__mmask8) -1);
8913 extern __inline __m256d
8914 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8915 _mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X,
8916 __m256d __Y)
8918 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8919 (__v4di) __X,
8920 (__v4df) __W,
8921 (__mmask8) __U);
8924 extern __inline __m256d
8925 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8926 _mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y)
8928 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8929 (__v4di) __X,
8930 (__v4df)
8931 _mm256_setzero_pd (),
8932 (__mmask8) __U);
8935 extern __inline __m256d
8936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8937 _mm256_mask_permutevar_pd (__m256d __W, __mmask8 __U, __m256d __A,
8938 __m256i __C)
8940 return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
8941 (__v4di) __C,
8942 (__v4df) __W,
8943 (__mmask8)
8944 __U);
8947 extern __inline __m256d
8948 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8949 _mm256_maskz_permutevar_pd (__mmask8 __U, __m256d __A, __m256i __C)
8951 return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
8952 (__v4di) __C,
8953 (__v4df)
8954 _mm256_setzero_pd (),
8955 (__mmask8)
8956 __U);
8959 extern __inline __m256
8960 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8961 _mm256_mask_permutevar_ps (__m256 __W, __mmask8 __U, __m256 __A,
8962 __m256i __C)
8964 return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
8965 (__v8si) __C,
8966 (__v8sf) __W,
8967 (__mmask8) __U);
8970 extern __inline __m256
8971 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8972 _mm256_maskz_permutevar_ps (__mmask8 __U, __m256 __A, __m256i __C)
8974 return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
8975 (__v8si) __C,
8976 (__v8sf)
8977 _mm256_setzero_ps (),
8978 (__mmask8) __U);
8981 extern __inline __m128d
8982 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8983 _mm_mask_permutevar_pd (__m128d __W, __mmask8 __U, __m128d __A,
8984 __m128i __C)
8986 return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
8987 (__v2di) __C,
8988 (__v2df) __W,
8989 (__mmask8) __U);
8992 extern __inline __m128d
8993 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8994 _mm_maskz_permutevar_pd (__mmask8 __U, __m128d __A, __m128i __C)
8996 return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
8997 (__v2di) __C,
8998 (__v2df)
8999 _mm_setzero_pd (),
9000 (__mmask8) __U);
9003 extern __inline __m128
9004 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9005 _mm_mask_permutevar_ps (__m128 __W, __mmask8 __U, __m128 __A,
9006 __m128i __C)
9008 return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
9009 (__v4si) __C,
9010 (__v4sf) __W,
9011 (__mmask8) __U);
9014 extern __inline __m128
9015 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9016 _mm_maskz_permutevar_ps (__mmask8 __U, __m128 __A, __m128i __C)
9018 return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
9019 (__v4si) __C,
9020 (__v4sf)
9021 _mm_setzero_ps (),
9022 (__mmask8) __U);
9025 extern __inline __m256i
9026 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9027 _mm256_maskz_mullo_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
9029 return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
9030 (__v8si) __B,
9031 (__v8si)
9032 _mm256_setzero_si256 (),
9033 __M);
9036 extern __inline __m256i
9037 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9038 _mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y)
9040 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
9041 (__v4di) __X,
9042 (__v4di)
9043 _mm256_setzero_si256 (),
9044 __M);
9047 extern __inline __m256i
9048 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9049 _mm256_mask_mullo_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
9050 __m256i __B)
9052 return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
9053 (__v8si) __B,
9054 (__v8si) __W, __M);
9057 extern __inline __m128i
9058 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9059 _mm_maskz_mullo_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
9061 return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
9062 (__v4si) __B,
9063 (__v4si)
9064 _mm_setzero_si128 (),
9065 __M);
9068 extern __inline __m128i
9069 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9070 _mm_mask_mullo_epi32 (__m128i __W, __mmask16 __M, __m128i __A,
9071 __m128i __B)
9073 return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
9074 (__v4si) __B,
9075 (__v4si) __W, __M);
9078 extern __inline __m256i
9079 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9080 _mm256_mask_mul_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
9081 __m256i __Y)
9083 return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
9084 (__v8si) __Y,
9085 (__v4di) __W, __M);
9088 extern __inline __m256i
9089 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9090 _mm256_maskz_mul_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
9092 return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
9093 (__v8si) __Y,
9094 (__v4di)
9095 _mm256_setzero_si256 (),
9096 __M);
9099 extern __inline __m128i
9100 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9101 _mm_mask_mul_epi32 (__m128i __W, __mmask8 __M, __m128i __X,
9102 __m128i __Y)
9104 return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
9105 (__v4si) __Y,
9106 (__v2di) __W, __M);
9109 extern __inline __m128i
9110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9111 _mm_maskz_mul_epi32 (__mmask8 __M, __m128i __X, __m128i __Y)
9113 return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
9114 (__v4si) __Y,
9115 (__v2di)
9116 _mm_setzero_si128 (),
9117 __M);
9120 extern __inline __m256i
9121 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9122 _mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X,
9123 __m256i __Y)
9125 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
9126 (__v4di) __X,
9127 (__v4di) __W,
9128 __M);
9131 extern __inline __m256i
9132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9133 _mm256_mask_mul_epu32 (__m256i __W, __mmask8 __M, __m256i __X,
9134 __m256i __Y)
9136 return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
9137 (__v8si) __Y,
9138 (__v4di) __W, __M);
9141 extern __inline __m256i
9142 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9143 _mm256_maskz_permutexvar_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
9145 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
9146 (__v8si) __X,
9147 (__v8si)
9148 _mm256_setzero_si256 (),
9149 __M);
9152 extern __inline __m256i
9153 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9154 _mm256_maskz_mul_epu32 (__mmask8 __M, __m256i __X, __m256i __Y)
9156 return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
9157 (__v8si) __Y,
9158 (__v4di)
9159 _mm256_setzero_si256 (),
9160 __M);
9163 extern __inline __m128i
9164 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9165 _mm_mask_mul_epu32 (__m128i __W, __mmask8 __M, __m128i __X,
9166 __m128i __Y)
9168 return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
9169 (__v4si) __Y,
9170 (__v2di) __W, __M);
9173 extern __inline __m128i
9174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9175 _mm_maskz_mul_epu32 (__mmask8 __M, __m128i __X, __m128i __Y)
9177 return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
9178 (__v4si) __Y,
9179 (__v2di)
9180 _mm_setzero_si128 (),
9181 __M);
9184 extern __inline __m256i
9185 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9186 _mm256_mask_permutexvar_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
9187 __m256i __Y)
9189 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
9190 (__v8si) __X,
9191 (__v8si) __W,
9192 __M);
9195 #ifdef __OPTIMIZE__
9196 extern __inline __m256i
9197 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9198 _mm256_mask_permutex_epi64 (__m256i __W, __mmask8 __M,
9199 __m256i __X, const int __I)
9201 return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
9202 __I,
9203 (__v4di) __W,
9204 (__mmask8) __M);
9207 extern __inline __m256i
9208 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9209 _mm256_maskz_permutex_epi64 (__mmask8 __M, __m256i __X, const int __I)
9211 return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
9212 __I,
9213 (__v4di)
9214 _mm256_setzero_si256 (),
9215 (__mmask8) __M);
9218 extern __inline __m256d
9219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9220 _mm256_mask_shuffle_pd (__m256d __W, __mmask8 __U, __m256d __A,
9221 __m256d __B, const int __imm)
9223 return (__m256d) __builtin_ia32_shufpd256_mask ((__v4df) __A,
9224 (__v4df) __B, __imm,
9225 (__v4df) __W,
9226 (__mmask8) __U);
9229 extern __inline __m256d
9230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9231 _mm256_maskz_shuffle_pd (__mmask8 __U, __m256d __A, __m256d __B,
9232 const int __imm)
9234 return (__m256d) __builtin_ia32_shufpd256_mask ((__v4df) __A,
9235 (__v4df) __B, __imm,
9236 (__v4df)
9237 _mm256_setzero_pd (),
9238 (__mmask8) __U);
9241 extern __inline __m128d
9242 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9243 _mm_mask_shuffle_pd (__m128d __W, __mmask8 __U, __m128d __A,
9244 __m128d __B, const int __imm)
9246 return (__m128d) __builtin_ia32_shufpd128_mask ((__v2df) __A,
9247 (__v2df) __B, __imm,
9248 (__v2df) __W,
9249 (__mmask8) __U);
9252 extern __inline __m128d
9253 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9254 _mm_maskz_shuffle_pd (__mmask8 __U, __m128d __A, __m128d __B,
9255 const int __imm)
9257 return (__m128d) __builtin_ia32_shufpd128_mask ((__v2df) __A,
9258 (__v2df) __B, __imm,
9259 (__v2df)
9260 _mm_setzero_pd (),
9261 (__mmask8) __U);
9264 extern __inline __m256
9265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9266 _mm256_mask_shuffle_ps (__m256 __W, __mmask8 __U, __m256 __A,
9267 __m256 __B, const int __imm)
9269 return (__m256) __builtin_ia32_shufps256_mask ((__v8sf) __A,
9270 (__v8sf) __B, __imm,
9271 (__v8sf) __W,
9272 (__mmask8) __U);
9275 extern __inline __m256
9276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9277 _mm256_maskz_shuffle_ps (__mmask8 __U, __m256 __A, __m256 __B,
9278 const int __imm)
9280 return (__m256) __builtin_ia32_shufps256_mask ((__v8sf) __A,
9281 (__v8sf) __B, __imm,
9282 (__v8sf)
9283 _mm256_setzero_ps (),
9284 (__mmask8) __U);
9287 extern __inline __m128
9288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9289 _mm_mask_shuffle_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
9290 const int __imm)
9292 return (__m128) __builtin_ia32_shufps128_mask ((__v4sf) __A,
9293 (__v4sf) __B, __imm,
9294 (__v4sf) __W,
9295 (__mmask8) __U);
9298 extern __inline __m128
9299 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9300 _mm_maskz_shuffle_ps (__mmask8 __U, __m128 __A, __m128 __B,
9301 const int __imm)
9303 return (__m128) __builtin_ia32_shufps128_mask ((__v4sf) __A,
9304 (__v4sf) __B, __imm,
9305 (__v4sf)
9306 _mm_setzero_ps (),
9307 (__mmask8) __U);
9310 extern __inline __m256i
9311 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9312 _mm256_inserti32x4 (__m256i __A, __m128i __B, const int __imm)
9314 return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
9315 (__v4si) __B,
9316 __imm,
9317 (__v8si)
9318 _mm256_setzero_si256 (),
9319 (__mmask8) -
9323 extern __inline __m256i
9324 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9325 _mm256_mask_inserti32x4 (__m256i __W, __mmask8 __U, __m256i __A,
9326 __m128i __B, const int __imm)
9328 return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
9329 (__v4si) __B,
9330 __imm,
9331 (__v8si) __W,
9332 (__mmask8)
9333 __U);
9336 extern __inline __m256i
9337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9338 _mm256_maskz_inserti32x4 (__mmask8 __U, __m256i __A, __m128i __B,
9339 const int __imm)
9341 return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
9342 (__v4si) __B,
9343 __imm,
9344 (__v8si)
9345 _mm256_setzero_si256 (),
9346 (__mmask8)
9347 __U);
9350 extern __inline __m256
9351 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9352 _mm256_insertf32x4 (__m256 __A, __m128 __B, const int __imm)
9354 return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
9355 (__v4sf) __B,
9356 __imm,
9357 (__v8sf)
9358 _mm256_setzero_ps (),
9359 (__mmask8) -1);
9362 extern __inline __m256
9363 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9364 _mm256_mask_insertf32x4 (__m256 __W, __mmask8 __U, __m256 __A,
9365 __m128 __B, const int __imm)
9367 return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
9368 (__v4sf) __B,
9369 __imm,
9370 (__v8sf) __W,
9371 (__mmask8) __U);
9374 extern __inline __m256
9375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9376 _mm256_maskz_insertf32x4 (__mmask8 __U, __m256 __A, __m128 __B,
9377 const int __imm)
9379 return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
9380 (__v4sf) __B,
9381 __imm,
9382 (__v8sf)
9383 _mm256_setzero_ps (),
9384 (__mmask8) __U);
9387 extern __inline __m128i
9388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9389 _mm256_extracti32x4_epi32 (__m256i __A, const int __imm)
9391 return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
9392 __imm,
9393 (__v4si)
9394 _mm_setzero_si128 (),
9395 (__mmask8) -
9399 extern __inline __m128i
9400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9401 _mm256_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m256i __A,
9402 const int __imm)
9404 return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
9405 __imm,
9406 (__v4si) __W,
9407 (__mmask8)
9408 __U);
9411 extern __inline __m128i
9412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9413 _mm256_maskz_extracti32x4_epi32 (__mmask8 __U, __m256i __A,
9414 const int __imm)
9416 return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
9417 __imm,
9418 (__v4si)
9419 _mm_setzero_si128 (),
9420 (__mmask8)
9421 __U);
9424 extern __inline __m128
9425 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9426 _mm256_extractf32x4_ps (__m256 __A, const int __imm)
9428 return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
9429 __imm,
9430 (__v4sf)
9431 _mm_setzero_ps (),
9432 (__mmask8) -
9436 extern __inline __m128
9437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9438 _mm256_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m256 __A,
9439 const int __imm)
9441 return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
9442 __imm,
9443 (__v4sf) __W,
9444 (__mmask8)
9445 __U);
9448 extern __inline __m128
9449 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9450 _mm256_maskz_extractf32x4_ps (__mmask8 __U, __m256 __A,
9451 const int __imm)
9453 return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
9454 __imm,
9455 (__v4sf)
9456 _mm_setzero_ps (),
9457 (__mmask8)
9458 __U);
9461 extern __inline __m256i
9462 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9463 _mm256_shuffle_i64x2 (__m256i __A, __m256i __B, const int __imm)
9465 return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
9466 (__v4di) __B,
9467 __imm,
9468 (__v4di)
9469 _mm256_setzero_si256 (),
9470 (__mmask8) -1);
9473 extern __inline __m256i
9474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9475 _mm256_mask_shuffle_i64x2 (__m256i __W, __mmask8 __U, __m256i __A,
9476 __m256i __B, const int __imm)
9478 return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
9479 (__v4di) __B,
9480 __imm,
9481 (__v4di) __W,
9482 (__mmask8) __U);
9485 extern __inline __m256i
9486 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9487 _mm256_maskz_shuffle_i64x2 (__mmask8 __U, __m256i __A, __m256i __B,
9488 const int __imm)
9490 return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
9491 (__v4di) __B,
9492 __imm,
9493 (__v4di)
9494 _mm256_setzero_si256 (),
9495 (__mmask8) __U);
9498 extern __inline __m256i
9499 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9500 _mm256_shuffle_i32x4 (__m256i __A, __m256i __B, const int __imm)
9502 return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
9503 (__v8si) __B,
9504 __imm,
9505 (__v8si)
9506 _mm256_setzero_si256 (),
9507 (__mmask8) -1);
9510 extern __inline __m256i
9511 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9512 _mm256_mask_shuffle_i32x4 (__m256i __W, __mmask8 __U, __m256i __A,
9513 __m256i __B, const int __imm)
9515 return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
9516 (__v8si) __B,
9517 __imm,
9518 (__v8si) __W,
9519 (__mmask8) __U);
9522 extern __inline __m256i
9523 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9524 _mm256_maskz_shuffle_i32x4 (__mmask8 __U, __m256i __A, __m256i __B,
9525 const int __imm)
9527 return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
9528 (__v8si) __B,
9529 __imm,
9530 (__v8si)
9531 _mm256_setzero_si256 (),
9532 (__mmask8) __U);
9535 extern __inline __m256d
9536 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9537 _mm256_shuffle_f64x2 (__m256d __A, __m256d __B, const int __imm)
9539 return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
9540 (__v4df) __B,
9541 __imm,
9542 (__v4df)
9543 _mm256_setzero_pd (),
9544 (__mmask8) -1);
9547 extern __inline __m256d
9548 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9549 _mm256_mask_shuffle_f64x2 (__m256d __W, __mmask8 __U, __m256d __A,
9550 __m256d __B, const int __imm)
9552 return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
9553 (__v4df) __B,
9554 __imm,
9555 (__v4df) __W,
9556 (__mmask8) __U);
9559 extern __inline __m256d
9560 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9561 _mm256_maskz_shuffle_f64x2 (__mmask8 __U, __m256d __A, __m256d __B,
9562 const int __imm)
9564 return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
9565 (__v4df) __B,
9566 __imm,
9567 (__v4df)
9568 _mm256_setzero_pd (),
9569 (__mmask8) __U);
9572 extern __inline __m256
9573 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9574 _mm256_shuffle_f32x4 (__m256 __A, __m256 __B, const int __imm)
9576 return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
9577 (__v8sf) __B,
9578 __imm,
9579 (__v8sf)
9580 _mm256_setzero_ps (),
9581 (__mmask8) -1);
9584 extern __inline __m256
9585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9586 _mm256_mask_shuffle_f32x4 (__m256 __W, __mmask8 __U, __m256 __A,
9587 __m256 __B, const int __imm)
9589 return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
9590 (__v8sf) __B,
9591 __imm,
9592 (__v8sf) __W,
9593 (__mmask8) __U);
9596 extern __inline __m256
9597 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9598 _mm256_maskz_shuffle_f32x4 (__mmask8 __U, __m256 __A, __m256 __B,
9599 const int __imm)
9601 return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
9602 (__v8sf) __B,
9603 __imm,
9604 (__v8sf)
9605 _mm256_setzero_ps (),
9606 (__mmask8) __U);
9609 extern __inline __m256d
9610 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9611 _mm256_fixupimm_pd (__m256d __A, __m256d __B, __m256i __C,
9612 const int __imm)
9614 return (__m256d) __builtin_ia32_fixupimmpd256_mask ((__v4df) __A,
9615 (__v4df) __B,
9616 (__v4di) __C,
9617 __imm,
9618 (__mmask8) -1);
9621 extern __inline __m256d
9622 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9623 _mm256_mask_fixupimm_pd (__m256d __A, __mmask8 __U, __m256d __B,
9624 __m256i __C, const int __imm)
9626 return (__m256d) __builtin_ia32_fixupimmpd256_mask ((__v4df) __A,
9627 (__v4df) __B,
9628 (__v4di) __C,
9629 __imm,
9630 (__mmask8) __U);
9633 extern __inline __m256d
9634 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9635 _mm256_maskz_fixupimm_pd (__mmask8 __U, __m256d __A, __m256d __B,
9636 __m256i __C, const int __imm)
9638 return (__m256d) __builtin_ia32_fixupimmpd256_maskz ((__v4df) __A,
9639 (__v4df) __B,
9640 (__v4di) __C,
9641 __imm,
9642 (__mmask8) __U);
9645 extern __inline __m256
9646 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9647 _mm256_fixupimm_ps (__m256 __A, __m256 __B, __m256i __C,
9648 const int __imm)
9650 return (__m256) __builtin_ia32_fixupimmps256_mask ((__v8sf) __A,
9651 (__v8sf) __B,
9652 (__v8si) __C,
9653 __imm,
9654 (__mmask8) -1);
9657 extern __inline __m256
9658 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9659 _mm256_mask_fixupimm_ps (__m256 __A, __mmask8 __U, __m256 __B,
9660 __m256i __C, const int __imm)
9662 return (__m256) __builtin_ia32_fixupimmps256_mask ((__v8sf) __A,
9663 (__v8sf) __B,
9664 (__v8si) __C,
9665 __imm,
9666 (__mmask8) __U);
9669 extern __inline __m256
9670 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9671 _mm256_maskz_fixupimm_ps (__mmask8 __U, __m256 __A, __m256 __B,
9672 __m256i __C, const int __imm)
9674 return (__m256) __builtin_ia32_fixupimmps256_maskz ((__v8sf) __A,
9675 (__v8sf) __B,
9676 (__v8si) __C,
9677 __imm,
9678 (__mmask8) __U);
9681 extern __inline __m128d
9682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9683 _mm_fixupimm_pd (__m128d __A, __m128d __B, __m128i __C,
9684 const int __imm)
9686 return (__m128d) __builtin_ia32_fixupimmpd128_mask ((__v2df) __A,
9687 (__v2df) __B,
9688 (__v2di) __C,
9689 __imm,
9690 (__mmask8) -1);
9693 extern __inline __m128d
9694 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9695 _mm_mask_fixupimm_pd (__m128d __A, __mmask8 __U, __m128d __B,
9696 __m128i __C, const int __imm)
9698 return (__m128d) __builtin_ia32_fixupimmpd128_mask ((__v2df) __A,
9699 (__v2df) __B,
9700 (__v2di) __C,
9701 __imm,
9702 (__mmask8) __U);
9705 extern __inline __m128d
9706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9707 _mm_maskz_fixupimm_pd (__mmask8 __U, __m128d __A, __m128d __B,
9708 __m128i __C, const int __imm)
9710 return (__m128d) __builtin_ia32_fixupimmpd128_maskz ((__v2df) __A,
9711 (__v2df) __B,
9712 (__v2di) __C,
9713 __imm,
9714 (__mmask8) __U);
9717 extern __inline __m128
9718 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9719 _mm_fixupimm_ps (__m128 __A, __m128 __B, __m128i __C, const int __imm)
9721 return (__m128) __builtin_ia32_fixupimmps128_mask ((__v4sf) __A,
9722 (__v4sf) __B,
9723 (__v4si) __C,
9724 __imm,
9725 (__mmask8) -1);
9728 extern __inline __m128
9729 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9730 _mm_mask_fixupimm_ps (__m128 __A, __mmask8 __U, __m128 __B,
9731 __m128i __C, const int __imm)
9733 return (__m128) __builtin_ia32_fixupimmps128_mask ((__v4sf) __A,
9734 (__v4sf) __B,
9735 (__v4si) __C,
9736 __imm,
9737 (__mmask8) __U);
9740 extern __inline __m128
9741 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9742 _mm_maskz_fixupimm_ps (__mmask8 __U, __m128 __A, __m128 __B,
9743 __m128i __C, const int __imm)
9745 return (__m128) __builtin_ia32_fixupimmps128_maskz ((__v4sf) __A,
9746 (__v4sf) __B,
9747 (__v4si) __C,
9748 __imm,
9749 (__mmask8) __U);
9752 extern __inline __m256i
9753 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9754 _mm256_mask_srli_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
9755 const int __imm)
9757 return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm,
9758 (__v8si) __W,
9759 (__mmask8) __U);
9762 extern __inline __m256i
9763 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9764 _mm256_maskz_srli_epi32 (__mmask8 __U, __m256i __A, const int __imm)
9766 return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm,
9767 (__v8si)
9768 _mm256_setzero_si256 (),
9769 (__mmask8) __U);
9772 extern __inline __m128i
9773 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9774 _mm_mask_srli_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
9775 const int __imm)
9777 return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm,
9778 (__v4si) __W,
9779 (__mmask8) __U);
9782 extern __inline __m128i
9783 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9784 _mm_maskz_srli_epi32 (__mmask8 __U, __m128i __A, const int __imm)
9786 return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm,
9787 (__v4si)
9788 _mm_setzero_si128 (),
9789 (__mmask8) __U);
9792 extern __inline __m256i
9793 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9794 _mm256_mask_srli_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
9795 const int __imm)
9797 return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm,
9798 (__v4di) __W,
9799 (__mmask8) __U);
9802 extern __inline __m256i
9803 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9804 _mm256_maskz_srli_epi64 (__mmask8 __U, __m256i __A, const int __imm)
9806 return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm,
9807 (__v4di)
9808 _mm256_setzero_si256 (),
9809 (__mmask8) __U);
9812 extern __inline __m128i
9813 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9814 _mm_mask_srli_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
9815 const int __imm)
9817 return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm,
9818 (__v2di) __W,
9819 (__mmask8) __U);
9822 extern __inline __m128i
9823 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9824 _mm_maskz_srli_epi64 (__mmask8 __U, __m128i __A, const int __imm)
9826 return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm,
9827 (__v2di)
9828 _mm_setzero_si128 (),
9829 (__mmask8) __U);
9832 extern __inline __m256i
9833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9834 _mm256_ternarylogic_epi64 (__m256i __A, __m256i __B, __m256i __C,
9835 const int imm)
9837 return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A,
9838 (__v4di) __B,
9839 (__v4di) __C, imm,
9840 (__mmask8) -1);
9843 extern __inline __m256i
9844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9845 _mm256_mask_ternarylogic_epi64 (__m256i __A, __mmask8 __U,
9846 __m256i __B, __m256i __C,
9847 const int imm)
9849 return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A,
9850 (__v4di) __B,
9851 (__v4di) __C, imm,
9852 (__mmask8) __U);
9855 extern __inline __m256i
9856 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9857 _mm256_maskz_ternarylogic_epi64 (__mmask8 __U, __m256i __A,
9858 __m256i __B, __m256i __C,
9859 const int imm)
9861 return (__m256i) __builtin_ia32_pternlogq256_maskz ((__v4di) __A,
9862 (__v4di) __B,
9863 (__v4di) __C,
9864 imm,
9865 (__mmask8) __U);
9868 extern __inline __m256i
9869 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9870 _mm256_ternarylogic_epi32 (__m256i __A, __m256i __B, __m256i __C,
9871 const int imm)
9873 return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A,
9874 (__v8si) __B,
9875 (__v8si) __C, imm,
9876 (__mmask8) -1);
9879 extern __inline __m256i
9880 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9881 _mm256_mask_ternarylogic_epi32 (__m256i __A, __mmask8 __U,
9882 __m256i __B, __m256i __C,
9883 const int imm)
9885 return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A,
9886 (__v8si) __B,
9887 (__v8si) __C, imm,
9888 (__mmask8) __U);
9891 extern __inline __m256i
9892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9893 _mm256_maskz_ternarylogic_epi32 (__mmask8 __U, __m256i __A,
9894 __m256i __B, __m256i __C,
9895 const int imm)
9897 return (__m256i) __builtin_ia32_pternlogd256_maskz ((__v8si) __A,
9898 (__v8si) __B,
9899 (__v8si) __C,
9900 imm,
9901 (__mmask8) __U);
9904 extern __inline __m128i
9905 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9906 _mm_ternarylogic_epi64 (__m128i __A, __m128i __B, __m128i __C,
9907 const int imm)
9909 return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A,
9910 (__v2di) __B,
9911 (__v2di) __C, imm,
9912 (__mmask8) -1);
9915 extern __inline __m128i
9916 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9917 _mm_mask_ternarylogic_epi64 (__m128i __A, __mmask8 __U,
9918 __m128i __B, __m128i __C, const int imm)
9920 return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A,
9921 (__v2di) __B,
9922 (__v2di) __C, imm,
9923 (__mmask8) __U);
9926 extern __inline __m128i
9927 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9928 _mm_maskz_ternarylogic_epi64 (__mmask8 __U, __m128i __A,
9929 __m128i __B, __m128i __C, const int imm)
9931 return (__m128i) __builtin_ia32_pternlogq128_maskz ((__v2di) __A,
9932 (__v2di) __B,
9933 (__v2di) __C,
9934 imm,
9935 (__mmask8) __U);
9938 extern __inline __m128i
9939 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9940 _mm_ternarylogic_epi32 (__m128i __A, __m128i __B, __m128i __C,
9941 const int imm)
9943 return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A,
9944 (__v4si) __B,
9945 (__v4si) __C, imm,
9946 (__mmask8) -1);
9949 extern __inline __m128i
9950 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9951 _mm_mask_ternarylogic_epi32 (__m128i __A, __mmask8 __U,
9952 __m128i __B, __m128i __C, const int imm)
9954 return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A,
9955 (__v4si) __B,
9956 (__v4si) __C, imm,
9957 (__mmask8) __U);
9960 extern __inline __m128i
9961 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9962 _mm_maskz_ternarylogic_epi32 (__mmask8 __U, __m128i __A,
9963 __m128i __B, __m128i __C, const int imm)
9965 return (__m128i) __builtin_ia32_pternlogd128_maskz ((__v4si) __A,
9966 (__v4si) __B,
9967 (__v4si) __C,
9968 imm,
9969 (__mmask8) __U);
9972 extern __inline __m256
9973 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9974 _mm256_roundscale_ps (__m256 __A, const int __imm)
9976 return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
9977 __imm,
9978 (__v8sf)
9979 _mm256_setzero_ps (),
9980 (__mmask8) -1);
9983 extern __inline __m256
9984 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9985 _mm256_mask_roundscale_ps (__m256 __W, __mmask8 __U, __m256 __A,
9986 const int __imm)
9988 return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
9989 __imm,
9990 (__v8sf) __W,
9991 (__mmask8) __U);
9994 extern __inline __m256
9995 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9996 _mm256_maskz_roundscale_ps (__mmask8 __U, __m256 __A, const int __imm)
9998 return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
9999 __imm,
10000 (__v8sf)
10001 _mm256_setzero_ps (),
10002 (__mmask8) __U);
10005 extern __inline __m256d
10006 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10007 _mm256_roundscale_pd (__m256d __A, const int __imm)
10009 return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
10010 __imm,
10011 (__v4df)
10012 _mm256_setzero_pd (),
10013 (__mmask8) -1);
10016 extern __inline __m256d
10017 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10018 _mm256_mask_roundscale_pd (__m256d __W, __mmask8 __U, __m256d __A,
10019 const int __imm)
10021 return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
10022 __imm,
10023 (__v4df) __W,
10024 (__mmask8) __U);
10027 extern __inline __m256d
10028 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10029 _mm256_maskz_roundscale_pd (__mmask8 __U, __m256d __A, const int __imm)
10031 return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
10032 __imm,
10033 (__v4df)
10034 _mm256_setzero_pd (),
10035 (__mmask8) __U);
10038 extern __inline __m128
10039 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10040 _mm_roundscale_ps (__m128 __A, const int __imm)
10042 return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
10043 __imm,
10044 (__v4sf)
10045 _mm_setzero_ps (),
10046 (__mmask8) -1);
10049 extern __inline __m128
10050 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10051 _mm_mask_roundscale_ps (__m128 __W, __mmask8 __U, __m128 __A,
10052 const int __imm)
10054 return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
10055 __imm,
10056 (__v4sf) __W,
10057 (__mmask8) __U);
10060 extern __inline __m128
10061 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10062 _mm_maskz_roundscale_ps (__mmask8 __U, __m128 __A, const int __imm)
10064 return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
10065 __imm,
10066 (__v4sf)
10067 _mm_setzero_ps (),
10068 (__mmask8) __U);
10071 extern __inline __m128d
10072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10073 _mm_roundscale_pd (__m128d __A, const int __imm)
10075 return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
10076 __imm,
10077 (__v2df)
10078 _mm_setzero_pd (),
10079 (__mmask8) -1);
10082 extern __inline __m128d
10083 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10084 _mm_mask_roundscale_pd (__m128d __W, __mmask8 __U, __m128d __A,
10085 const int __imm)
10087 return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
10088 __imm,
10089 (__v2df) __W,
10090 (__mmask8) __U);
10093 extern __inline __m128d
10094 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10095 _mm_maskz_roundscale_pd (__mmask8 __U, __m128d __A, const int __imm)
10097 return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
10098 __imm,
10099 (__v2df)
10100 _mm_setzero_pd (),
10101 (__mmask8) __U);
10104 extern __inline __m256
10105 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10106 _mm256_getmant_ps (__m256 __A, _MM_MANTISSA_NORM_ENUM __B,
10107 _MM_MANTISSA_SIGN_ENUM __C)
10109 return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
10110 (__C << 2) | __B,
10111 (__v8sf)
10112 _mm256_setzero_ps (),
10113 (__mmask8) -1);
10116 extern __inline __m256
10117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10118 _mm256_mask_getmant_ps (__m256 __W, __mmask8 __U, __m256 __A,
10119 _MM_MANTISSA_NORM_ENUM __B,
10120 _MM_MANTISSA_SIGN_ENUM __C)
10122 return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
10123 (__C << 2) | __B,
10124 (__v8sf) __W,
10125 (__mmask8) __U);
10128 extern __inline __m256
10129 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10130 _mm256_maskz_getmant_ps (__mmask8 __U, __m256 __A,
10131 _MM_MANTISSA_NORM_ENUM __B,
10132 _MM_MANTISSA_SIGN_ENUM __C)
10134 return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
10135 (__C << 2) | __B,
10136 (__v8sf)
10137 _mm256_setzero_ps (),
10138 (__mmask8) __U);
10141 extern __inline __m128
10142 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10143 _mm_getmant_ps (__m128 __A, _MM_MANTISSA_NORM_ENUM __B,
10144 _MM_MANTISSA_SIGN_ENUM __C)
10146 return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
10147 (__C << 2) | __B,
10148 (__v4sf)
10149 _mm_setzero_ps (),
10150 (__mmask8) -1);
10153 extern __inline __m128
10154 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10155 _mm_mask_getmant_ps (__m128 __W, __mmask8 __U, __m128 __A,
10156 _MM_MANTISSA_NORM_ENUM __B,
10157 _MM_MANTISSA_SIGN_ENUM __C)
10159 return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
10160 (__C << 2) | __B,
10161 (__v4sf) __W,
10162 (__mmask8) __U);
10165 extern __inline __m128
10166 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10167 _mm_maskz_getmant_ps (__mmask8 __U, __m128 __A,
10168 _MM_MANTISSA_NORM_ENUM __B,
10169 _MM_MANTISSA_SIGN_ENUM __C)
10171 return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
10172 (__C << 2) | __B,
10173 (__v4sf)
10174 _mm_setzero_ps (),
10175 (__mmask8) __U);
10178 extern __inline __m256d
10179 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10180 _mm256_getmant_pd (__m256d __A, _MM_MANTISSA_NORM_ENUM __B,
10181 _MM_MANTISSA_SIGN_ENUM __C)
10183 return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
10184 (__C << 2) | __B,
10185 (__v4df)
10186 _mm256_setzero_pd (),
10187 (__mmask8) -1);
10190 extern __inline __m256d
10191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10192 _mm256_mask_getmant_pd (__m256d __W, __mmask8 __U, __m256d __A,
10193 _MM_MANTISSA_NORM_ENUM __B,
10194 _MM_MANTISSA_SIGN_ENUM __C)
10196 return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
10197 (__C << 2) | __B,
10198 (__v4df) __W,
10199 (__mmask8) __U);
10202 extern __inline __m256d
10203 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10204 _mm256_maskz_getmant_pd (__mmask8 __U, __m256d __A,
10205 _MM_MANTISSA_NORM_ENUM __B,
10206 _MM_MANTISSA_SIGN_ENUM __C)
10208 return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
10209 (__C << 2) | __B,
10210 (__v4df)
10211 _mm256_setzero_pd (),
10212 (__mmask8) __U);
10215 extern __inline __m128d
10216 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10217 _mm_getmant_pd (__m128d __A, _MM_MANTISSA_NORM_ENUM __B,
10218 _MM_MANTISSA_SIGN_ENUM __C)
10220 return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
10221 (__C << 2) | __B,
10222 (__v2df)
10223 _mm_setzero_pd (),
10224 (__mmask8) -1);
10227 extern __inline __m128d
10228 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10229 _mm_mask_getmant_pd (__m128d __W, __mmask8 __U, __m128d __A,
10230 _MM_MANTISSA_NORM_ENUM __B,
10231 _MM_MANTISSA_SIGN_ENUM __C)
10233 return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
10234 (__C << 2) | __B,
10235 (__v2df) __W,
10236 (__mmask8) __U);
10239 extern __inline __m128d
10240 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10241 _mm_maskz_getmant_pd (__mmask8 __U, __m128d __A,
10242 _MM_MANTISSA_NORM_ENUM __B,
10243 _MM_MANTISSA_SIGN_ENUM __C)
10245 return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
10246 (__C << 2) | __B,
10247 (__v2df)
10248 _mm_setzero_pd (),
10249 (__mmask8) __U);
10252 extern __inline __m256
10253 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10254 _mm256_mmask_i32gather_ps (__m256 __v1_old, __mmask8 __mask,
10255 __m256i __index, float const *__addr,
10256 int __scale)
10258 return (__m256) __builtin_ia32_gather3siv8sf ((__v8sf) __v1_old,
10259 __addr,
10260 (__v8si) __index,
10261 __mask, __scale);
10264 extern __inline __m128
10265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10266 _mm_mmask_i32gather_ps (__m128 __v1_old, __mmask8 __mask,
10267 __m128i __index, float const *__addr,
10268 int __scale)
10270 return (__m128) __builtin_ia32_gather3siv4sf ((__v4sf) __v1_old,
10271 __addr,
10272 (__v4si) __index,
10273 __mask, __scale);
10276 extern __inline __m256d
10277 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10278 _mm256_mmask_i32gather_pd (__m256d __v1_old, __mmask8 __mask,
10279 __m128i __index, double const *__addr,
10280 int __scale)
10282 return (__m256d) __builtin_ia32_gather3siv4df ((__v4df) __v1_old,
10283 __addr,
10284 (__v4si) __index,
10285 __mask, __scale);
10288 extern __inline __m128d
10289 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10290 _mm_mmask_i32gather_pd (__m128d __v1_old, __mmask8 __mask,
10291 __m128i __index, double const *__addr,
10292 int __scale)
10294 return (__m128d) __builtin_ia32_gather3siv2df ((__v2df) __v1_old,
10295 __addr,
10296 (__v4si) __index,
10297 __mask, __scale);
10300 extern __inline __m128
10301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10302 _mm256_mmask_i64gather_ps (__m128 __v1_old, __mmask8 __mask,
10303 __m256i __index, float const *__addr,
10304 int __scale)
10306 return (__m128) __builtin_ia32_gather3div8sf ((__v4sf) __v1_old,
10307 __addr,
10308 (__v4di) __index,
10309 __mask, __scale);
10312 extern __inline __m128
10313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10314 _mm_mmask_i64gather_ps (__m128 __v1_old, __mmask8 __mask,
10315 __m128i __index, float const *__addr,
10316 int __scale)
10318 return (__m128) __builtin_ia32_gather3div4sf ((__v4sf) __v1_old,
10319 __addr,
10320 (__v2di) __index,
10321 __mask, __scale);
10324 extern __inline __m256d
10325 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10326 _mm256_mmask_i64gather_pd (__m256d __v1_old, __mmask8 __mask,
10327 __m256i __index, double const *__addr,
10328 int __scale)
10330 return (__m256d) __builtin_ia32_gather3div4df ((__v4df) __v1_old,
10331 __addr,
10332 (__v4di) __index,
10333 __mask, __scale);
10336 extern __inline __m128d
10337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10338 _mm_mmask_i64gather_pd (__m128d __v1_old, __mmask8 __mask,
10339 __m128i __index, double const *__addr,
10340 int __scale)
10342 return (__m128d) __builtin_ia32_gather3div2df ((__v2df) __v1_old,
10343 __addr,
10344 (__v2di) __index,
10345 __mask, __scale);
10348 extern __inline __m256i
10349 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10350 _mm256_mmask_i32gather_epi32 (__m256i __v1_old, __mmask8 __mask,
10351 __m256i __index, int const *__addr,
10352 int __scale)
10354 return (__m256i) __builtin_ia32_gather3siv8si ((__v8si) __v1_old,
10355 __addr,
10356 (__v8si) __index,
10357 __mask, __scale);
10360 extern __inline __m128i
10361 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10362 _mm_mmask_i32gather_epi32 (__m128i __v1_old, __mmask8 __mask,
10363 __m128i __index, int const *__addr,
10364 int __scale)
10366 return (__m128i) __builtin_ia32_gather3siv4si ((__v4si) __v1_old,
10367 __addr,
10368 (__v4si) __index,
10369 __mask, __scale);
10372 extern __inline __m256i
10373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10374 _mm256_mmask_i32gather_epi64 (__m256i __v1_old, __mmask8 __mask,
10375 __m128i __index, long long const *__addr,
10376 int __scale)
10378 return (__m256i) __builtin_ia32_gather3siv4di ((__v4di) __v1_old,
10379 __addr,
10380 (__v4si) __index,
10381 __mask, __scale);
10384 extern __inline __m128i
10385 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10386 _mm_mmask_i32gather_epi64 (__m128i __v1_old, __mmask8 __mask,
10387 __m128i __index, long long const *__addr,
10388 int __scale)
10390 return (__m128i) __builtin_ia32_gather3siv2di ((__v2di) __v1_old,
10391 __addr,
10392 (__v4si) __index,
10393 __mask, __scale);
10396 extern __inline __m128i
10397 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10398 _mm256_mmask_i64gather_epi32 (__m128i __v1_old, __mmask8 __mask,
10399 __m256i __index, int const *__addr,
10400 int __scale)
10402 return (__m128i) __builtin_ia32_gather3div8si ((__v4si) __v1_old,
10403 __addr,
10404 (__v4di) __index,
10405 __mask, __scale);
10408 extern __inline __m128i
10409 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10410 _mm_mmask_i64gather_epi32 (__m128i __v1_old, __mmask8 __mask,
10411 __m128i __index, int const *__addr,
10412 int __scale)
10414 return (__m128i) __builtin_ia32_gather3div4si ((__v4si) __v1_old,
10415 __addr,
10416 (__v2di) __index,
10417 __mask, __scale);
10420 extern __inline __m256i
10421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10422 _mm256_mmask_i64gather_epi64 (__m256i __v1_old, __mmask8 __mask,
10423 __m256i __index, long long const *__addr,
10424 int __scale)
10426 return (__m256i) __builtin_ia32_gather3div4di ((__v4di) __v1_old,
10427 __addr,
10428 (__v4di) __index,
10429 __mask, __scale);
10432 extern __inline __m128i
10433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10434 _mm_mmask_i64gather_epi64 (__m128i __v1_old, __mmask8 __mask,
10435 __m128i __index, long long const *__addr,
10436 int __scale)
10438 return (__m128i) __builtin_ia32_gather3div2di ((__v2di) __v1_old,
10439 __addr,
10440 (__v2di) __index,
10441 __mask, __scale);
10444 extern __inline void
10445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10446 _mm256_i32scatter_ps (float *__addr, __m256i __index,
10447 __m256 __v1, const int __scale)
10449 __builtin_ia32_scattersiv8sf (__addr, (__mmask8) 0xFF,
10450 (__v8si) __index, (__v8sf) __v1,
10451 __scale);
10454 extern __inline void
10455 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10456 _mm256_mask_i32scatter_ps (float *__addr, __mmask8 __mask,
10457 __m256i __index, __m256 __v1,
10458 const int __scale)
10460 __builtin_ia32_scattersiv8sf (__addr, __mask, (__v8si) __index,
10461 (__v8sf) __v1, __scale);
10464 extern __inline void
10465 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10466 _mm_i32scatter_ps (float *__addr, __m128i __index, __m128 __v1,
10467 const int __scale)
10469 __builtin_ia32_scattersiv4sf (__addr, (__mmask8) 0xFF,
10470 (__v4si) __index, (__v4sf) __v1,
10471 __scale);
10474 extern __inline void
10475 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10476 _mm_mask_i32scatter_ps (float *__addr, __mmask8 __mask,
10477 __m128i __index, __m128 __v1,
10478 const int __scale)
10480 __builtin_ia32_scattersiv4sf (__addr, __mask, (__v4si) __index,
10481 (__v4sf) __v1, __scale);
10484 extern __inline void
10485 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10486 _mm256_i32scatter_pd (double *__addr, __m128i __index,
10487 __m256d __v1, const int __scale)
10489 __builtin_ia32_scattersiv4df (__addr, (__mmask8) 0xFF,
10490 (__v4si) __index, (__v4df) __v1,
10491 __scale);
10494 extern __inline void
10495 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10496 _mm256_mask_i32scatter_pd (double *__addr, __mmask8 __mask,
10497 __m128i __index, __m256d __v1,
10498 const int __scale)
10500 __builtin_ia32_scattersiv4df (__addr, __mask, (__v4si) __index,
10501 (__v4df) __v1, __scale);
10504 extern __inline void
10505 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10506 _mm_i32scatter_pd (double *__addr, __m128i __index,
10507 __m128d __v1, const int __scale)
10509 __builtin_ia32_scattersiv2df (__addr, (__mmask8) 0xFF,
10510 (__v4si) __index, (__v2df) __v1,
10511 __scale);
10514 extern __inline void
10515 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10516 _mm_mask_i32scatter_pd (double *__addr, __mmask8 __mask,
10517 __m128i __index, __m128d __v1,
10518 const int __scale)
10520 __builtin_ia32_scattersiv2df (__addr, __mask, (__v4si) __index,
10521 (__v2df) __v1, __scale);
10524 extern __inline void
10525 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10526 _mm256_i64scatter_ps (float *__addr, __m256i __index,
10527 __m128 __v1, const int __scale)
10529 __builtin_ia32_scatterdiv8sf (__addr, (__mmask8) 0xFF,
10530 (__v4di) __index, (__v4sf) __v1,
10531 __scale);
10534 extern __inline void
10535 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10536 _mm256_mask_i64scatter_ps (float *__addr, __mmask8 __mask,
10537 __m256i __index, __m128 __v1,
10538 const int __scale)
10540 __builtin_ia32_scatterdiv8sf (__addr, __mask, (__v4di) __index,
10541 (__v4sf) __v1, __scale);
10544 extern __inline void
10545 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10546 _mm_i64scatter_ps (float *__addr, __m128i __index, __m128 __v1,
10547 const int __scale)
10549 __builtin_ia32_scatterdiv4sf (__addr, (__mmask8) 0xFF,
10550 (__v2di) __index, (__v4sf) __v1,
10551 __scale);
10554 extern __inline void
10555 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10556 _mm_mask_i64scatter_ps (float *__addr, __mmask8 __mask,
10557 __m128i __index, __m128 __v1,
10558 const int __scale)
10560 __builtin_ia32_scatterdiv4sf (__addr, __mask, (__v2di) __index,
10561 (__v4sf) __v1, __scale);
10564 extern __inline void
10565 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10566 _mm256_i64scatter_pd (double *__addr, __m256i __index,
10567 __m256d __v1, const int __scale)
10569 __builtin_ia32_scatterdiv4df (__addr, (__mmask8) 0xFF,
10570 (__v4di) __index, (__v4df) __v1,
10571 __scale);
10574 extern __inline void
10575 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10576 _mm256_mask_i64scatter_pd (double *__addr, __mmask8 __mask,
10577 __m256i __index, __m256d __v1,
10578 const int __scale)
10580 __builtin_ia32_scatterdiv4df (__addr, __mask, (__v4di) __index,
10581 (__v4df) __v1, __scale);
10584 extern __inline void
10585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10586 _mm_i64scatter_pd (double *__addr, __m128i __index,
10587 __m128d __v1, const int __scale)
10589 __builtin_ia32_scatterdiv2df (__addr, (__mmask8) 0xFF,
10590 (__v2di) __index, (__v2df) __v1,
10591 __scale);
10594 extern __inline void
10595 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10596 _mm_mask_i64scatter_pd (double *__addr, __mmask8 __mask,
10597 __m128i __index, __m128d __v1,
10598 const int __scale)
10600 __builtin_ia32_scatterdiv2df (__addr, __mask, (__v2di) __index,
10601 (__v2df) __v1, __scale);
10604 extern __inline void
10605 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10606 _mm256_i32scatter_epi32 (int *__addr, __m256i __index,
10607 __m256i __v1, const int __scale)
10609 __builtin_ia32_scattersiv8si (__addr, (__mmask8) 0xFF,
10610 (__v8si) __index, (__v8si) __v1,
10611 __scale);
10614 extern __inline void
10615 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10616 _mm256_mask_i32scatter_epi32 (int *__addr, __mmask8 __mask,
10617 __m256i __index, __m256i __v1,
10618 const int __scale)
10620 __builtin_ia32_scattersiv8si (__addr, __mask, (__v8si) __index,
10621 (__v8si) __v1, __scale);
10624 extern __inline void
10625 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10626 _mm_i32scatter_epi32 (int *__addr, __m128i __index,
10627 __m128i __v1, const int __scale)
10629 __builtin_ia32_scattersiv4si (__addr, (__mmask8) 0xFF,
10630 (__v4si) __index, (__v4si) __v1,
10631 __scale);
10634 extern __inline void
10635 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10636 _mm_mask_i32scatter_epi32 (int *__addr, __mmask8 __mask,
10637 __m128i __index, __m128i __v1,
10638 const int __scale)
10640 __builtin_ia32_scattersiv4si (__addr, __mask, (__v4si) __index,
10641 (__v4si) __v1, __scale);
10644 extern __inline void
10645 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10646 _mm256_i32scatter_epi64 (long long *__addr, __m128i __index,
10647 __m256i __v1, const int __scale)
10649 __builtin_ia32_scattersiv4di (__addr, (__mmask8) 0xFF,
10650 (__v4si) __index, (__v4di) __v1,
10651 __scale);
10654 extern __inline void
10655 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10656 _mm256_mask_i32scatter_epi64 (long long *__addr, __mmask8 __mask,
10657 __m128i __index, __m256i __v1,
10658 const int __scale)
10660 __builtin_ia32_scattersiv4di (__addr, __mask, (__v4si) __index,
10661 (__v4di) __v1, __scale);
10664 extern __inline void
10665 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10666 _mm_i32scatter_epi64 (long long *__addr, __m128i __index,
10667 __m128i __v1, const int __scale)
10669 __builtin_ia32_scattersiv2di (__addr, (__mmask8) 0xFF,
10670 (__v4si) __index, (__v2di) __v1,
10671 __scale);
10674 extern __inline void
10675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10676 _mm_mask_i32scatter_epi64 (long long *__addr, __mmask8 __mask,
10677 __m128i __index, __m128i __v1,
10678 const int __scale)
10680 __builtin_ia32_scattersiv2di (__addr, __mask, (__v4si) __index,
10681 (__v2di) __v1, __scale);
10684 extern __inline void
10685 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10686 _mm256_i64scatter_epi32 (int *__addr, __m256i __index,
10687 __m128i __v1, const int __scale)
10689 __builtin_ia32_scatterdiv8si (__addr, (__mmask8) 0xFF,
10690 (__v4di) __index, (__v4si) __v1,
10691 __scale);
10694 extern __inline void
10695 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10696 _mm256_mask_i64scatter_epi32 (int *__addr, __mmask8 __mask,
10697 __m256i __index, __m128i __v1,
10698 const int __scale)
10700 __builtin_ia32_scatterdiv8si (__addr, __mask, (__v4di) __index,
10701 (__v4si) __v1, __scale);
10704 extern __inline void
10705 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10706 _mm_i64scatter_epi32 (int *__addr, __m128i __index,
10707 __m128i __v1, const int __scale)
10709 __builtin_ia32_scatterdiv4si (__addr, (__mmask8) 0xFF,
10710 (__v2di) __index, (__v4si) __v1,
10711 __scale);
10714 extern __inline void
10715 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10716 _mm_mask_i64scatter_epi32 (int *__addr, __mmask8 __mask,
10717 __m128i __index, __m128i __v1,
10718 const int __scale)
10720 __builtin_ia32_scatterdiv4si (__addr, __mask, (__v2di) __index,
10721 (__v4si) __v1, __scale);
10724 extern __inline void
10725 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10726 _mm256_i64scatter_epi64 (long long *__addr, __m256i __index,
10727 __m256i __v1, const int __scale)
10729 __builtin_ia32_scatterdiv4di (__addr, (__mmask8) 0xFF,
10730 (__v4di) __index, (__v4di) __v1,
10731 __scale);
10734 extern __inline void
10735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10736 _mm256_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask,
10737 __m256i __index, __m256i __v1,
10738 const int __scale)
10740 __builtin_ia32_scatterdiv4di (__addr, __mask, (__v4di) __index,
10741 (__v4di) __v1, __scale);
10744 extern __inline void
10745 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10746 _mm_i64scatter_epi64 (long long *__addr, __m128i __index,
10747 __m128i __v1, const int __scale)
10749 __builtin_ia32_scatterdiv2di (__addr, (__mmask8) 0xFF,
10750 (__v2di) __index, (__v2di) __v1,
10751 __scale);
10754 extern __inline void
10755 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10756 _mm_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask,
10757 __m128i __index, __m128i __v1,
10758 const int __scale)
10760 __builtin_ia32_scatterdiv2di (__addr, __mask, (__v2di) __index,
10761 (__v2di) __v1, __scale);
10764 extern __inline __m256i
10765 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10766 _mm256_mask_shuffle_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
10767 _MM_PERM_ENUM __mask)
10769 return (__m256i) __builtin_ia32_pshufd256_mask ((__v8si) __A, __mask,
10770 (__v8si) __W,
10771 (__mmask8) __U);
10774 extern __inline __m256i
10775 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10776 _mm256_maskz_shuffle_epi32 (__mmask8 __U, __m256i __A,
10777 _MM_PERM_ENUM __mask)
10779 return (__m256i) __builtin_ia32_pshufd256_mask ((__v8si) __A, __mask,
10780 (__v8si)
10781 _mm256_setzero_si256 (),
10782 (__mmask8) __U);
10785 extern __inline __m128i
10786 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10787 _mm_mask_shuffle_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
10788 _MM_PERM_ENUM __mask)
10790 return (__m128i) __builtin_ia32_pshufd128_mask ((__v4si) __A, __mask,
10791 (__v4si) __W,
10792 (__mmask8) __U);
10795 extern __inline __m128i
10796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10797 _mm_maskz_shuffle_epi32 (__mmask8 __U, __m128i __A,
10798 _MM_PERM_ENUM __mask)
10800 return (__m128i) __builtin_ia32_pshufd128_mask ((__v4si) __A, __mask,
10801 (__v4si)
10802 _mm_setzero_si128 (),
10803 (__mmask8) __U);
10806 extern __inline __m256i
10807 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10808 _mm256_rol_epi32 (__m256i __A, const int __B)
10810 return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
10811 (__v8si)
10812 _mm256_setzero_si256 (),
10813 (__mmask8) -1);
10816 extern __inline __m256i
10817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10818 _mm256_mask_rol_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
10819 const int __B)
10821 return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
10822 (__v8si) __W,
10823 (__mmask8) __U);
10826 extern __inline __m256i
10827 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10828 _mm256_maskz_rol_epi32 (__mmask8 __U, __m256i __A, const int __B)
10830 return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
10831 (__v8si)
10832 _mm256_setzero_si256 (),
10833 (__mmask8) __U);
10836 extern __inline __m128i
10837 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10838 _mm_rol_epi32 (__m128i __A, const int __B)
10840 return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
10841 (__v4si)
10842 _mm_setzero_si128 (),
10843 (__mmask8) -1);
10846 extern __inline __m128i
10847 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10848 _mm_mask_rol_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
10849 const int __B)
10851 return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
10852 (__v4si) __W,
10853 (__mmask8) __U);
10856 extern __inline __m128i
10857 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10858 _mm_maskz_rol_epi32 (__mmask8 __U, __m128i __A, const int __B)
10860 return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
10861 (__v4si)
10862 _mm_setzero_si128 (),
10863 (__mmask8) __U);
10866 extern __inline __m256i
10867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10868 _mm256_ror_epi32 (__m256i __A, const int __B)
10870 return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
10871 (__v8si)
10872 _mm256_setzero_si256 (),
10873 (__mmask8) -1);
10876 extern __inline __m256i
10877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10878 _mm256_mask_ror_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
10879 const int __B)
10881 return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
10882 (__v8si) __W,
10883 (__mmask8) __U);
10886 extern __inline __m256i
10887 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10888 _mm256_maskz_ror_epi32 (__mmask8 __U, __m256i __A, const int __B)
10890 return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
10891 (__v8si)
10892 _mm256_setzero_si256 (),
10893 (__mmask8) __U);
10896 extern __inline __m128i
10897 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10898 _mm_ror_epi32 (__m128i __A, const int __B)
10900 return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
10901 (__v4si)
10902 _mm_setzero_si128 (),
10903 (__mmask8) -1);
10906 extern __inline __m128i
10907 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10908 _mm_mask_ror_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
10909 const int __B)
10911 return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
10912 (__v4si) __W,
10913 (__mmask8) __U);
10916 extern __inline __m128i
10917 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10918 _mm_maskz_ror_epi32 (__mmask8 __U, __m128i __A, const int __B)
10920 return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
10921 (__v4si)
10922 _mm_setzero_si128 (),
10923 (__mmask8) __U);
10926 extern __inline __m256i
10927 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10928 _mm256_rol_epi64 (__m256i __A, const int __B)
10930 return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
10931 (__v4di)
10932 _mm256_setzero_si256 (),
10933 (__mmask8) -1);
10936 extern __inline __m256i
10937 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10938 _mm256_mask_rol_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
10939 const int __B)
10941 return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
10942 (__v4di) __W,
10943 (__mmask8) __U);
10946 extern __inline __m256i
10947 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10948 _mm256_maskz_rol_epi64 (__mmask8 __U, __m256i __A, const int __B)
10950 return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
10951 (__v4di)
10952 _mm256_setzero_si256 (),
10953 (__mmask8) __U);
10956 extern __inline __m128i
10957 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10958 _mm_rol_epi64 (__m128i __A, const int __B)
10960 return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
10961 (__v2di)
10962 _mm_setzero_di (),
10963 (__mmask8) -1);
10966 extern __inline __m128i
10967 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10968 _mm_mask_rol_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
10969 const int __B)
10971 return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
10972 (__v2di) __W,
10973 (__mmask8) __U);
10976 extern __inline __m128i
10977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10978 _mm_maskz_rol_epi64 (__mmask8 __U, __m128i __A, const int __B)
10980 return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
10981 (__v2di)
10982 _mm_setzero_di (),
10983 (__mmask8) __U);
10986 extern __inline __m256i
10987 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10988 _mm256_ror_epi64 (__m256i __A, const int __B)
10990 return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
10991 (__v4di)
10992 _mm256_setzero_si256 (),
10993 (__mmask8) -1);
10996 extern __inline __m256i
10997 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10998 _mm256_mask_ror_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
10999 const int __B)
11001 return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
11002 (__v4di) __W,
11003 (__mmask8) __U);
11006 extern __inline __m256i
11007 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11008 _mm256_maskz_ror_epi64 (__mmask8 __U, __m256i __A, const int __B)
11010 return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
11011 (__v4di)
11012 _mm256_setzero_si256 (),
11013 (__mmask8) __U);
11016 extern __inline __m128i
11017 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11018 _mm_ror_epi64 (__m128i __A, const int __B)
11020 return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
11021 (__v2di)
11022 _mm_setzero_di (),
11023 (__mmask8) -1);
11026 extern __inline __m128i
11027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11028 _mm_mask_ror_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
11029 const int __B)
11031 return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
11032 (__v2di) __W,
11033 (__mmask8) __U);
11036 extern __inline __m128i
11037 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11038 _mm_maskz_ror_epi64 (__mmask8 __U, __m128i __A, const int __B)
11040 return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
11041 (__v2di)
11042 _mm_setzero_di (),
11043 (__mmask8) __U);
11046 extern __inline __m128i
11047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11048 _mm_alignr_epi32 (__m128i __A, __m128i __B, const int __imm)
11050 return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
11051 (__v4si) __B, __imm,
11052 (__v4si)
11053 _mm_setzero_si128 (),
11054 (__mmask8) -1);
11057 extern __inline __m128i
11058 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11059 _mm_mask_alignr_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11060 __m128i __B, const int __imm)
11062 return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
11063 (__v4si) __B, __imm,
11064 (__v4si) __W,
11065 (__mmask8) __U);
11068 extern __inline __m128i
11069 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11070 _mm_maskz_alignr_epi32 (__mmask8 __U, __m128i __A, __m128i __B,
11071 const int __imm)
11073 return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
11074 (__v4si) __B, __imm,
11075 (__v4si)
11076 _mm_setzero_si128 (),
11077 (__mmask8) __U);
11080 extern __inline __m128i
11081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11082 _mm_alignr_epi64 (__m128i __A, __m128i __B, const int __imm)
11084 return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
11085 (__v2di) __B, __imm,
11086 (__v2di)
11087 _mm_setzero_di (),
11088 (__mmask8) -1);
11091 extern __inline __m128i
11092 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11093 _mm_mask_alignr_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
11094 __m128i __B, const int __imm)
11096 return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
11097 (__v2di) __B, __imm,
11098 (__v2di) __W,
11099 (__mmask8) __U);
11102 extern __inline __m128i
11103 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11104 _mm_maskz_alignr_epi64 (__mmask8 __U, __m128i __A, __m128i __B,
11105 const int __imm)
11107 return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
11108 (__v2di) __B, __imm,
11109 (__v2di)
11110 _mm_setzero_di (),
11111 (__mmask8) __U);
11114 extern __inline __m256i
11115 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11116 _mm256_alignr_epi32 (__m256i __A, __m256i __B, const int __imm)
11118 return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
11119 (__v8si) __B, __imm,
11120 (__v8si)
11121 _mm256_setzero_si256 (),
11122 (__mmask8) -1);
11125 extern __inline __m256i
11126 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11127 _mm256_mask_alignr_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11128 __m256i __B, const int __imm)
11130 return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
11131 (__v8si) __B, __imm,
11132 (__v8si) __W,
11133 (__mmask8) __U);
11136 extern __inline __m256i
11137 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11138 _mm256_maskz_alignr_epi32 (__mmask8 __U, __m256i __A, __m256i __B,
11139 const int __imm)
11141 return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
11142 (__v8si) __B, __imm,
11143 (__v8si)
11144 _mm256_setzero_si256 (),
11145 (__mmask8) __U);
11148 extern __inline __m256i
11149 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11150 _mm256_alignr_epi64 (__m256i __A, __m256i __B, const int __imm)
11152 return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
11153 (__v4di) __B, __imm,
11154 (__v4di)
11155 _mm256_setzero_si256 (),
11156 (__mmask8) -1);
11159 extern __inline __m256i
11160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11161 _mm256_mask_alignr_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11162 __m256i __B, const int __imm)
11164 return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
11165 (__v4di) __B, __imm,
11166 (__v4di) __W,
11167 (__mmask8) __U);
11170 extern __inline __m256i
11171 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11172 _mm256_maskz_alignr_epi64 (__mmask8 __U, __m256i __A, __m256i __B,
11173 const int __imm)
11175 return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
11176 (__v4di) __B, __imm,
11177 (__v4di)
11178 _mm256_setzero_si256 (),
11179 (__mmask8) __U);
11182 extern __inline __m128i
11183 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11184 _mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A,
11185 const int __I)
11187 return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I,
11188 (__v8hi) __W,
11189 (__mmask8) __U);
11192 extern __inline __m128i
11193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11194 _mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A, const int __I)
11196 return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I,
11197 (__v8hi)
11198 _mm_setzero_hi (),
11199 (__mmask8) __U);
11202 extern __inline __m128i
11203 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11204 _mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A,
11205 const int __I)
11207 return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I,
11208 (__v8hi) __W,
11209 (__mmask8) __U);
11212 extern __inline __m128i
11213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11214 _mm256_maskz_cvtps_ph (__mmask8 __U, __m256 __A, const int __I)
11216 return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I,
11217 (__v8hi)
11218 _mm_setzero_hi (),
11219 (__mmask8) __U);
11222 extern __inline __m256i
11223 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11224 _mm256_mask_srai_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11225 const int __imm)
11227 return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm,
11228 (__v8si) __W,
11229 (__mmask8) __U);
11232 extern __inline __m256i
11233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11234 _mm256_maskz_srai_epi32 (__mmask8 __U, __m256i __A, const int __imm)
11236 return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm,
11237 (__v8si)
11238 _mm256_setzero_si256 (),
11239 (__mmask8) __U);
11242 extern __inline __m128i
11243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11244 _mm_mask_srai_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11245 const int __imm)
11247 return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm,
11248 (__v4si) __W,
11249 (__mmask8) __U);
11252 extern __inline __m128i
11253 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11254 _mm_maskz_srai_epi32 (__mmask8 __U, __m128i __A, const int __imm)
11256 return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm,
11257 (__v4si)
11258 _mm_setzero_si128 (),
11259 (__mmask8) __U);
11262 extern __inline __m256i
11263 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11264 _mm256_srai_epi64 (__m256i __A, const int __imm)
11266 return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
11267 (__v4di)
11268 _mm256_setzero_si256 (),
11269 (__mmask8) -1);
11272 extern __inline __m256i
11273 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11274 _mm256_mask_srai_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11275 const int __imm)
11277 return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
11278 (__v4di) __W,
11279 (__mmask8) __U);
11282 extern __inline __m256i
11283 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11284 _mm256_maskz_srai_epi64 (__mmask8 __U, __m256i __A, const int __imm)
11286 return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
11287 (__v4di)
11288 _mm256_setzero_si256 (),
11289 (__mmask8) __U);
11292 extern __inline __m128i
11293 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11294 _mm_srai_epi64 (__m128i __A, const int __imm)
11296 return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
11297 (__v2di)
11298 _mm_setzero_di (),
11299 (__mmask8) -1);
11302 extern __inline __m128i
11303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11304 _mm_mask_srai_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
11305 const int __imm)
11307 return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
11308 (__v2di) __W,
11309 (__mmask8) __U);
11312 extern __inline __m128i
11313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11314 _mm_maskz_srai_epi64 (__mmask8 __U, __m128i __A, const int __imm)
11316 return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
11317 (__v2di)
11318 _mm_setzero_si128 (),
11319 (__mmask8) __U);
11322 extern __inline __m128i
11323 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11324 _mm_mask_slli_epi32 (__m128i __W, __mmask8 __U, __m128i __A, int __B)
11326 return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B,
11327 (__v4si) __W,
11328 (__mmask8) __U);
11331 extern __inline __m128i
11332 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11333 _mm_maskz_slli_epi32 (__mmask8 __U, __m128i __A, int __B)
11335 return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B,
11336 (__v4si)
11337 _mm_setzero_si128 (),
11338 (__mmask8) __U);
11341 extern __inline __m128i
11342 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11343 _mm_mask_slli_epi64 (__m128i __W, __mmask8 __U, __m128i __A, int __B)
11345 return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B,
11346 (__v2di) __W,
11347 (__mmask8) __U);
11350 extern __inline __m128i
11351 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11352 _mm_maskz_slli_epi64 (__mmask8 __U, __m128i __A, int __B)
11354 return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B,
11355 (__v2di)
11356 _mm_setzero_di (),
11357 (__mmask8) __U);
11360 extern __inline __m256i
11361 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11362 _mm256_mask_slli_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11363 int __B)
11365 return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B,
11366 (__v8si) __W,
11367 (__mmask8) __U);
11370 extern __inline __m256i
11371 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11372 _mm256_maskz_slli_epi32 (__mmask8 __U, __m256i __A, int __B)
11374 return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B,
11375 (__v8si)
11376 _mm256_setzero_si256 (),
11377 (__mmask8) __U);
11380 extern __inline __m256i
11381 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11382 _mm256_mask_slli_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11383 int __B)
11385 return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B,
11386 (__v4di) __W,
11387 (__mmask8) __U);
11390 extern __inline __m256i
11391 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11392 _mm256_maskz_slli_epi64 (__mmask8 __U, __m256i __A, int __B)
11394 return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B,
11395 (__v4di)
11396 _mm256_setzero_si256 (),
11397 (__mmask8) __U);
11400 extern __inline __m256d
11401 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11402 _mm256_mask_permutex_pd (__m256d __W, __mmask8 __U, __m256d __X,
11403 const int __imm)
11405 return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __imm,
11406 (__v4df) __W,
11407 (__mmask8) __U);
11410 extern __inline __m256d
11411 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11412 _mm256_maskz_permutex_pd (__mmask8 __U, __m256d __X, const int __imm)
11414 return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __imm,
11415 (__v4df)
11416 _mm256_setzero_pd (),
11417 (__mmask8) __U);
11420 extern __inline __m256d
11421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11422 _mm256_mask_permute_pd (__m256d __W, __mmask8 __U, __m256d __X,
11423 const int __C)
11425 return (__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df) __X, __C,
11426 (__v4df) __W,
11427 (__mmask8) __U);
11430 extern __inline __m256d
11431 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11432 _mm256_maskz_permute_pd (__mmask8 __U, __m256d __X, const int __C)
11434 return (__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df) __X, __C,
11435 (__v4df)
11436 _mm256_setzero_pd (),
11437 (__mmask8) __U);
11440 extern __inline __m128d
11441 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11442 _mm_mask_permute_pd (__m128d __W, __mmask8 __U, __m128d __X,
11443 const int __C)
11445 return (__m128d) __builtin_ia32_vpermilpd_mask ((__v2df) __X, __C,
11446 (__v2df) __W,
11447 (__mmask8) __U);
11450 extern __inline __m128d
11451 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11452 _mm_maskz_permute_pd (__mmask8 __U, __m128d __X, const int __C)
11454 return (__m128d) __builtin_ia32_vpermilpd_mask ((__v2df) __X, __C,
11455 (__v2df)
11456 _mm_setzero_pd (),
11457 (__mmask8) __U);
11460 extern __inline __m256
11461 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11462 _mm256_mask_permute_ps (__m256 __W, __mmask8 __U, __m256 __X,
11463 const int __C)
11465 return (__m256) __builtin_ia32_vpermilps256_mask ((__v8sf) __X, __C,
11466 (__v8sf) __W,
11467 (__mmask8) __U);
11470 extern __inline __m256
11471 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11472 _mm256_maskz_permute_ps (__mmask8 __U, __m256 __X, const int __C)
11474 return (__m256) __builtin_ia32_vpermilps256_mask ((__v8sf) __X, __C,
11475 (__v8sf)
11476 _mm256_setzero_ps (),
11477 (__mmask8) __U);
11480 extern __inline __m128
11481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11482 _mm_mask_permute_ps (__m128 __W, __mmask8 __U, __m128 __X,
11483 const int __C)
11485 return (__m128) __builtin_ia32_vpermilps_mask ((__v4sf) __X, __C,
11486 (__v4sf) __W,
11487 (__mmask8) __U);
11490 extern __inline __m128
11491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11492 _mm_maskz_permute_ps (__mmask8 __U, __m128 __X, const int __C)
11494 return (__m128) __builtin_ia32_vpermilps_mask ((__v4sf) __X, __C,
11495 (__v4sf)
11496 _mm_setzero_ps (),
11497 (__mmask8) __U);
11500 extern __inline __m256d
11501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11502 _mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W)
11504 return (__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) __A,
11505 (__v4df) __W,
11506 (__mmask8) __U);
11509 extern __inline __m256
11510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11511 _mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W)
11513 return (__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) __A,
11514 (__v8sf) __W,
11515 (__mmask8) __U);
11518 extern __inline __m256i
11519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11520 _mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W)
11522 return (__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) __A,
11523 (__v4di) __W,
11524 (__mmask8) __U);
11527 extern __inline __m256i
11528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11529 _mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W)
11531 return (__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) __A,
11532 (__v8si) __W,
11533 (__mmask8) __U);
11536 extern __inline __m128d
11537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11538 _mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W)
11540 return (__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) __A,
11541 (__v2df) __W,
11542 (__mmask8) __U);
11545 extern __inline __m128
11546 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11547 _mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W)
11549 return (__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) __A,
11550 (__v4sf) __W,
11551 (__mmask8) __U);
11554 extern __inline __m128i
11555 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11556 _mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W)
11558 return (__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) __A,
11559 (__v2di) __W,
11560 (__mmask8) __U);
11563 extern __inline __m128i
11564 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11565 _mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W)
11567 return (__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) __A,
11568 (__v4si) __W,
11569 (__mmask8) __U);
11572 extern __inline __mmask8
11573 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11574 _mm256_cmp_epi64_mask (__m256i __X, __m256i __Y, const int __P)
11576 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
11577 (__v4di) __Y, __P,
11578 (__mmask8) -1);
11581 extern __inline __mmask8
11582 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11583 _mm256_cmp_epi32_mask (__m256i __X, __m256i __Y, const int __P)
11585 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11586 (__v8si) __Y, __P,
11587 (__mmask8) -1);
11590 extern __inline __mmask8
11591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11592 _mm256_cmp_epu64_mask (__m256i __X, __m256i __Y, const int __P)
11594 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11595 (__v4di) __Y, __P,
11596 (__mmask8) -1);
11599 extern __inline __mmask8
11600 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11601 _mm256_cmp_epu32_mask (__m256i __X, __m256i __Y, const int __P)
11603 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11604 (__v8si) __Y, __P,
11605 (__mmask8) -1);
11608 extern __inline __mmask8
11609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11610 _mm256_cmp_pd_mask (__m256d __X, __m256d __Y, const int __P)
11612 return (__mmask8) __builtin_ia32_cmppd256_mask ((__v4df) __X,
11613 (__v4df) __Y, __P,
11614 (__mmask8) -1);
11617 extern __inline __mmask8
11618 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11619 _mm256_cmp_ps_mask (__m256 __X, __m256 __Y, const int __P)
11621 return (__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf) __X,
11622 (__v8sf) __Y, __P,
11623 (__mmask8) -1);
11626 extern __inline __mmask8
11627 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11628 _mm256_mask_cmp_epi64_mask (__mmask8 __U, __m256i __X, __m256i __Y,
11629 const int __P)
11631 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
11632 (__v4di) __Y, __P,
11633 (__mmask8) __U);
11636 extern __inline __mmask8
11637 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11638 _mm256_mask_cmp_epi32_mask (__mmask8 __U, __m256i __X, __m256i __Y,
11639 const int __P)
11641 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11642 (__v8si) __Y, __P,
11643 (__mmask8) __U);
11646 extern __inline __mmask8
11647 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11648 _mm256_mask_cmp_epu64_mask (__mmask8 __U, __m256i __X, __m256i __Y,
11649 const int __P)
11651 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11652 (__v4di) __Y, __P,
11653 (__mmask8) __U);
11656 extern __inline __mmask8
11657 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11658 _mm256_mask_cmp_epu32_mask (__mmask8 __U, __m256i __X, __m256i __Y,
11659 const int __P)
11661 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11662 (__v8si) __Y, __P,
11663 (__mmask8) __U);
11666 extern __inline __mmask8
11667 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11668 _mm256_mask_cmp_pd_mask (__mmask8 __U, __m256d __X, __m256d __Y,
11669 const int __P)
11671 return (__mmask8) __builtin_ia32_cmppd256_mask ((__v4df) __X,
11672 (__v4df) __Y, __P,
11673 (__mmask8) __U);
11676 extern __inline __mmask8
11677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11678 _mm256_mask_cmp_ps_mask (__mmask8 __U, __m256 __X, __m256 __Y,
11679 const int __P)
11681 return (__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf) __X,
11682 (__v8sf) __Y, __P,
11683 (__mmask8) __U);
11686 extern __inline __mmask8
11687 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11688 _mm_cmp_epi64_mask (__m128i __X, __m128i __Y, const int __P)
11690 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
11691 (__v2di) __Y, __P,
11692 (__mmask8) -1);
11695 extern __inline __mmask8
11696 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11697 _mm_cmp_epi32_mask (__m128i __X, __m128i __Y, const int __P)
11699 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
11700 (__v4si) __Y, __P,
11701 (__mmask8) -1);
11704 extern __inline __mmask8
11705 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11706 _mm_cmp_epu64_mask (__m128i __X, __m128i __Y, const int __P)
11708 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
11709 (__v2di) __Y, __P,
11710 (__mmask8) -1);
11713 extern __inline __mmask8
11714 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11715 _mm_cmp_epu32_mask (__m128i __X, __m128i __Y, const int __P)
11717 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
11718 (__v4si) __Y, __P,
11719 (__mmask8) -1);
11722 extern __inline __mmask8
11723 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11724 _mm_cmp_pd_mask (__m128d __X, __m128d __Y, const int __P)
11726 return (__mmask8) __builtin_ia32_cmppd128_mask ((__v2df) __X,
11727 (__v2df) __Y, __P,
11728 (__mmask8) -1);
11731 extern __inline __mmask8
11732 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11733 _mm_cmp_ps_mask (__m128 __X, __m128 __Y, const int __P)
11735 return (__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf) __X,
11736 (__v4sf) __Y, __P,
11737 (__mmask8) -1);
11740 extern __inline __mmask8
11741 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11742 _mm_mask_cmp_epi64_mask (__mmask8 __U, __m128i __X, __m128i __Y,
11743 const int __P)
11745 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
11746 (__v2di) __Y, __P,
11747 (__mmask8) __U);
11750 extern __inline __mmask8
11751 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11752 _mm_mask_cmp_epi32_mask (__mmask8 __U, __m128i __X, __m128i __Y,
11753 const int __P)
11755 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
11756 (__v4si) __Y, __P,
11757 (__mmask8) __U);
11760 extern __inline __mmask8
11761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11762 _mm_mask_cmp_epu64_mask (__mmask8 __U, __m128i __X, __m128i __Y,
11763 const int __P)
11765 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
11766 (__v2di) __Y, __P,
11767 (__mmask8) __U);
11770 extern __inline __mmask8
11771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11772 _mm_mask_cmp_epu32_mask (__mmask8 __U, __m128i __X, __m128i __Y,
11773 const int __P)
11775 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
11776 (__v4si) __Y, __P,
11777 (__mmask8) __U);
11780 extern __inline __mmask8
11781 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11782 _mm_mask_cmp_pd_mask (__mmask8 __U, __m128d __X, __m128d __Y,
11783 const int __P)
11785 return (__mmask8) __builtin_ia32_cmppd128_mask ((__v2df) __X,
11786 (__v2df) __Y, __P,
11787 (__mmask8) __U);
11790 extern __inline __mmask8
11791 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11792 _mm_mask_cmp_ps_mask (__mmask8 __U, __m128 __X, __m128 __Y,
11793 const int __P)
11795 return (__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf) __X,
11796 (__v4sf) __Y, __P,
11797 (__mmask8) __U);
11800 extern __inline __m256d
11801 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11802 _mm256_permutex_pd (__m256d __X, const int __M)
11804 return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __M,
11805 (__v4df)
11806 _mm256_undefined_pd (),
11807 (__mmask8) -1);
11810 extern __inline __mmask8
11811 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11812 _mm256_mask_cmpneq_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11814 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11815 (__v8si) __Y, 4,
11816 (__mmask8) __M);
11819 extern __inline __mmask8
11820 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11821 _mm256_cmpneq_epu32_mask (__m256i __X, __m256i __Y)
11823 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11824 (__v8si) __Y, 4,
11825 (__mmask8) - 1);
11828 extern __inline __mmask8
11829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11830 _mm256_mask_cmplt_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11832 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11833 (__v8si) __Y, 1,
11834 (__mmask8) __M);
11837 extern __inline __mmask8
11838 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11839 _mm256_cmplt_epu32_mask (__m256i __X, __m256i __Y)
11841 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11842 (__v8si) __Y, 1,
11843 (__mmask8) - 1);
11846 extern __inline __mmask8
11847 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11848 _mm256_mask_cmpge_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11850 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11851 (__v8si) __Y, 5,
11852 (__mmask8) __M);
11855 extern __inline __mmask8
11856 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11857 _mm256_cmpge_epu32_mask (__m256i __X, __m256i __Y)
11859 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11860 (__v8si) __Y, 5,
11861 (__mmask8) - 1);
11864 extern __inline __mmask8
11865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11866 _mm256_mask_cmple_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11868 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11869 (__v8si) __Y, 2,
11870 (__mmask8) __M);
11873 extern __inline __mmask8
11874 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11875 _mm256_cmple_epu32_mask (__m256i __X, __m256i __Y)
11877 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11878 (__v8si) __Y, 2,
11879 (__mmask8) - 1);
11882 extern __inline __mmask8
11883 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11884 _mm256_mask_cmpneq_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11886 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11887 (__v4di) __Y, 4,
11888 (__mmask8) __M);
11891 extern __inline __mmask8
11892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11893 _mm256_cmpneq_epu64_mask (__m256i __X, __m256i __Y)
11895 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11896 (__v4di) __Y, 4,
11897 (__mmask8) - 1);
11900 extern __inline __mmask8
11901 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11902 _mm256_mask_cmplt_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11904 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11905 (__v4di) __Y, 1,
11906 (__mmask8) __M);
11909 extern __inline __mmask8
11910 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11911 _mm256_cmplt_epu64_mask (__m256i __X, __m256i __Y)
11913 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11914 (__v4di) __Y, 1,
11915 (__mmask8) - 1);
11918 extern __inline __mmask8
11919 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11920 _mm256_mask_cmpge_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11922 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11923 (__v4di) __Y, 5,
11924 (__mmask8) __M);
11927 extern __inline __mmask8
11928 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11929 _mm256_cmpge_epu64_mask (__m256i __X, __m256i __Y)
11931 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11932 (__v4di) __Y, 5,
11933 (__mmask8) - 1);
11936 extern __inline __mmask8
11937 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11938 _mm256_mask_cmple_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11940 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11941 (__v4di) __Y, 2,
11942 (__mmask8) __M);
11945 extern __inline __mmask8
11946 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11947 _mm256_cmple_epu64_mask (__m256i __X, __m256i __Y)
11949 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11950 (__v4di) __Y, 2,
11951 (__mmask8) - 1);
11954 extern __inline __mmask8
11955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11956 _mm256_mask_cmpneq_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11958 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11959 (__v8si) __Y, 4,
11960 (__mmask8) __M);
11963 extern __inline __mmask8
11964 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11965 _mm256_cmpneq_epi32_mask (__m256i __X, __m256i __Y)
11967 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11968 (__v8si) __Y, 4,
11969 (__mmask8) - 1);
11972 extern __inline __mmask8
11973 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11974 _mm256_mask_cmplt_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11976 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11977 (__v8si) __Y, 1,
11978 (__mmask8) __M);
11981 extern __inline __mmask8
11982 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11983 _mm256_cmplt_epi32_mask (__m256i __X, __m256i __Y)
11985 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11986 (__v8si) __Y, 1,
11987 (__mmask8) - 1);
11990 extern __inline __mmask8
11991 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11992 _mm256_mask_cmpge_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11994 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11995 (__v8si) __Y, 5,
11996 (__mmask8) __M);
11999 extern __inline __mmask8
12000 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12001 _mm256_cmpge_epi32_mask (__m256i __X, __m256i __Y)
12003 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
12004 (__v8si) __Y, 5,
12005 (__mmask8) - 1);
12008 extern __inline __mmask8
12009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12010 _mm256_mask_cmple_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
12012 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
12013 (__v8si) __Y, 2,
12014 (__mmask8) __M);
12017 extern __inline __mmask8
12018 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12019 _mm256_cmple_epi32_mask (__m256i __X, __m256i __Y)
12021 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
12022 (__v8si) __Y, 2,
12023 (__mmask8) - 1);
12026 extern __inline __mmask8
12027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12028 _mm256_mask_cmpneq_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
12030 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12031 (__v4di) __Y, 4,
12032 (__mmask8) __M);
12035 extern __inline __mmask8
12036 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12037 _mm256_cmpneq_epi64_mask (__m256i __X, __m256i __Y)
12039 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12040 (__v4di) __Y, 4,
12041 (__mmask8) - 1);
12044 extern __inline __mmask8
12045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12046 _mm256_mask_cmplt_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
12048 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12049 (__v4di) __Y, 1,
12050 (__mmask8) __M);
12053 extern __inline __mmask8
12054 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12055 _mm256_cmplt_epi64_mask (__m256i __X, __m256i __Y)
12057 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12058 (__v4di) __Y, 1,
12059 (__mmask8) - 1);
12062 extern __inline __mmask8
12063 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12064 _mm256_mask_cmpge_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
12066 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12067 (__v4di) __Y, 5,
12068 (__mmask8) __M);
12071 extern __inline __mmask8
12072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12073 _mm256_cmpge_epi64_mask (__m256i __X, __m256i __Y)
12075 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12076 (__v4di) __Y, 5,
12077 (__mmask8) - 1);
12080 extern __inline __mmask8
12081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12082 _mm256_mask_cmple_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
12084 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12085 (__v4di) __Y, 2,
12086 (__mmask8) __M);
12089 extern __inline __mmask8
12090 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12091 _mm256_cmple_epi64_mask (__m256i __X, __m256i __Y)
12093 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12094 (__v4di) __Y, 2,
12095 (__mmask8) - 1);
12098 extern __inline __mmask8
12099 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12100 _mm_mask_cmpneq_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12102 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12103 (__v4si) __Y, 4,
12104 (__mmask8) __M);
12107 extern __inline __mmask8
12108 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12109 _mm_cmpneq_epu32_mask (__m128i __X, __m128i __Y)
12111 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12112 (__v4si) __Y, 4,
12113 (__mmask8) - 1);
12116 extern __inline __mmask8
12117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12118 _mm_mask_cmplt_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12120 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12121 (__v4si) __Y, 1,
12122 (__mmask8) __M);
12125 extern __inline __mmask8
12126 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12127 _mm_cmplt_epu32_mask (__m128i __X, __m128i __Y)
12129 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12130 (__v4si) __Y, 1,
12131 (__mmask8) - 1);
12134 extern __inline __mmask8
12135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12136 _mm_mask_cmpge_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12138 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12139 (__v4si) __Y, 5,
12140 (__mmask8) __M);
12143 extern __inline __mmask8
12144 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12145 _mm_cmpge_epu32_mask (__m128i __X, __m128i __Y)
12147 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12148 (__v4si) __Y, 5,
12149 (__mmask8) - 1);
12152 extern __inline __mmask8
12153 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12154 _mm_mask_cmple_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12156 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12157 (__v4si) __Y, 2,
12158 (__mmask8) __M);
12161 extern __inline __mmask8
12162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12163 _mm_cmple_epu32_mask (__m128i __X, __m128i __Y)
12165 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12166 (__v4si) __Y, 2,
12167 (__mmask8) - 1);
12170 extern __inline __mmask8
12171 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12172 _mm_mask_cmpneq_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12174 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12175 (__v2di) __Y, 4,
12176 (__mmask8) __M);
12179 extern __inline __mmask8
12180 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12181 _mm_cmpneq_epu64_mask (__m128i __X, __m128i __Y)
12183 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12184 (__v2di) __Y, 4,
12185 (__mmask8) - 1);
12188 extern __inline __mmask8
12189 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12190 _mm_mask_cmplt_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12192 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12193 (__v2di) __Y, 1,
12194 (__mmask8) __M);
12197 extern __inline __mmask8
12198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12199 _mm_cmplt_epu64_mask (__m128i __X, __m128i __Y)
12201 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12202 (__v2di) __Y, 1,
12203 (__mmask8) - 1);
12206 extern __inline __mmask8
12207 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12208 _mm_mask_cmpge_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12210 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12211 (__v2di) __Y, 5,
12212 (__mmask8) __M);
12215 extern __inline __mmask8
12216 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12217 _mm_cmpge_epu64_mask (__m128i __X, __m128i __Y)
12219 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12220 (__v2di) __Y, 5,
12221 (__mmask8) - 1);
12224 extern __inline __mmask8
12225 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12226 _mm_mask_cmple_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12228 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12229 (__v2di) __Y, 2,
12230 (__mmask8) __M);
12233 extern __inline __mmask8
12234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12235 _mm_cmple_epu64_mask (__m128i __X, __m128i __Y)
12237 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12238 (__v2di) __Y, 2,
12239 (__mmask8) - 1);
12242 extern __inline __mmask8
12243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12244 _mm_mask_cmpneq_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12246 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12247 (__v4si) __Y, 4,
12248 (__mmask8) __M);
12251 extern __inline __mmask8
12252 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12253 _mm_cmpneq_epi32_mask (__m128i __X, __m128i __Y)
12255 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12256 (__v4si) __Y, 4,
12257 (__mmask8) - 1);
12260 extern __inline __mmask8
12261 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12262 _mm_mask_cmplt_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12264 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12265 (__v4si) __Y, 1,
12266 (__mmask8) __M);
12269 extern __inline __mmask8
12270 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12271 _mm_cmplt_epi32_mask (__m128i __X, __m128i __Y)
12273 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12274 (__v4si) __Y, 1,
12275 (__mmask8) - 1);
12278 extern __inline __mmask8
12279 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12280 _mm_mask_cmpge_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12282 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12283 (__v4si) __Y, 5,
12284 (__mmask8) __M);
12287 extern __inline __mmask8
12288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12289 _mm_cmpge_epi32_mask (__m128i __X, __m128i __Y)
12291 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12292 (__v4si) __Y, 5,
12293 (__mmask8) - 1);
12296 extern __inline __mmask8
12297 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12298 _mm_mask_cmple_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12300 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12301 (__v4si) __Y, 2,
12302 (__mmask8) __M);
12305 extern __inline __mmask8
12306 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12307 _mm_cmple_epi32_mask (__m128i __X, __m128i __Y)
12309 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12310 (__v4si) __Y, 2,
12311 (__mmask8) - 1);
12314 extern __inline __mmask8
12315 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12316 _mm_mask_cmpneq_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12318 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12319 (__v2di) __Y, 4,
12320 (__mmask8) __M);
12323 extern __inline __mmask8
12324 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12325 _mm_cmpneq_epi64_mask (__m128i __X, __m128i __Y)
12327 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12328 (__v2di) __Y, 4,
12329 (__mmask8) - 1);
12332 extern __inline __mmask8
12333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12334 _mm_mask_cmplt_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12336 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12337 (__v2di) __Y, 1,
12338 (__mmask8) __M);
12341 extern __inline __mmask8
12342 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12343 _mm_cmplt_epi64_mask (__m128i __X, __m128i __Y)
12345 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12346 (__v2di) __Y, 1,
12347 (__mmask8) - 1);
12350 extern __inline __mmask8
12351 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12352 _mm_mask_cmpge_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12354 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12355 (__v2di) __Y, 5,
12356 (__mmask8) __M);
12359 extern __inline __mmask8
12360 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12361 _mm_cmpge_epi64_mask (__m128i __X, __m128i __Y)
12363 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12364 (__v2di) __Y, 5,
12365 (__mmask8) - 1);
12368 extern __inline __mmask8
12369 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12370 _mm_mask_cmple_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12372 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12373 (__v2di) __Y, 2,
12374 (__mmask8) __M);
12377 extern __inline __mmask8
12378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12379 _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
12381 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12382 (__v2di) __Y, 2,
12383 (__mmask8) - 1);
12386 #else
12387 #define _mm256_permutex_pd(X, M) \
12388 ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(X), (int)(M), \
12389 (__v4df)(__m256d)_mm256_undefined_pd(),\
12390 (__mmask8)-1))
12392 #define _mm256_maskz_permutex_epi64(M, X, I) \
12393 ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
12394 (int)(I), \
12395 (__v4di)(__m256i) \
12396 (_mm256_setzero_si256()),\
12397 (__mmask8)(M)))
12399 #define _mm256_mask_permutex_epi64(W, M, X, I) \
12400 ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
12401 (int)(I), \
12402 (__v4di)(__m256i)(W), \
12403 (__mmask8)(M)))
12405 #define _mm256_insertf32x4(X, Y, C) \
12406 ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \
12407 (__v4sf)(__m128) (Y), (int) (C), \
12408 (__v8sf)(__m256)_mm256_setzero_ps(), \
12409 (__mmask8)-1))
12411 #define _mm256_mask_insertf32x4(W, U, X, Y, C) \
12412 ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \
12413 (__v4sf)(__m128) (Y), (int) (C), \
12414 (__v8sf)(__m256)(W), \
12415 (__mmask8)(U)))
12417 #define _mm256_maskz_insertf32x4(U, X, Y, C) \
12418 ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \
12419 (__v4sf)(__m128) (Y), (int) (C), \
12420 (__v8sf)(__m256)_mm256_setzero_ps(), \
12421 (__mmask8)(U)))
12423 #define _mm256_inserti32x4(X, Y, C) \
12424 ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
12425 (__v4si)(__m128i) (Y), (int) (C), \
12426 (__v8si)(__m256i)_mm256_setzero_si256(), \
12427 (__mmask8)-1))
12429 #define _mm256_mask_inserti32x4(W, U, X, Y, C) \
12430 ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
12431 (__v4si)(__m128i) (Y), (int) (C), \
12432 (__v8si)(__m256i)(W), \
12433 (__mmask8)(U)))
12435 #define _mm256_maskz_inserti32x4(U, X, Y, C) \
12436 ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
12437 (__v4si)(__m128i) (Y), (int) (C), \
12438 (__v8si)(__m256i)_mm256_setzero_si256(), \
12439 (__mmask8)(U)))
12441 #define _mm256_extractf32x4_ps(X, C) \
12442 ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
12443 (int) (C), \
12444 (__v4sf)(__m128)_mm_setzero_ps(), \
12445 (__mmask8)-1))
12447 #define _mm256_mask_extractf32x4_ps(W, U, X, C) \
12448 ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
12449 (int) (C), \
12450 (__v4sf)(__m128)(W), \
12451 (__mmask8)(U)))
12453 #define _mm256_maskz_extractf32x4_ps(U, X, C) \
12454 ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
12455 (int) (C), \
12456 (__v4sf)(__m128)_mm_setzero_ps(), \
12457 (__mmask8)(U)))
12459 #define _mm256_extracti32x4_epi32(X, C) \
12460 ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
12461 (int) (C), (__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)-1))
12463 #define _mm256_mask_extracti32x4_epi32(W, U, X, C) \
12464 ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
12465 (int) (C), (__v4si)(__m128i)(W), (__mmask8)(U)))
12467 #define _mm256_maskz_extracti32x4_epi32(U, X, C) \
12468 ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
12469 (int) (C), (__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)(U)))
12471 #define _mm256_shuffle_i64x2(X, Y, C) \
12472 ((__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X), \
12473 (__v4di)(__m256i)(Y), (int)(C), \
12474 (__v4di)(__m256i)_mm256_setzero_si256 (), \
12475 (__mmask8)-1))
12477 #define _mm256_mask_shuffle_i64x2(W, U, X, Y, C) \
12478 ((__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X), \
12479 (__v4di)(__m256i)(Y), (int)(C), \
12480 (__v4di)(__m256i)(W),\
12481 (__mmask8)(U)))
12483 #define _mm256_maskz_shuffle_i64x2(U, X, Y, C) \
12484 ((__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X), \
12485 (__v4di)(__m256i)(Y), (int)(C), \
12486 (__v4di)(__m256i)_mm256_setzero_si256 (), \
12487 (__mmask8)(U)))
12489 #define _mm256_shuffle_i32x4(X, Y, C) \
12490 ((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \
12491 (__v8si)(__m256i)(Y), (int)(C), \
12492 (__v8si)(__m256i)_mm256_setzero_si256(), \
12493 (__mmask8)-1))
12495 #define _mm256_mask_shuffle_i32x4(W, U, X, Y, C) \
12496 ((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \
12497 (__v8si)(__m256i)(Y), (int)(C), \
12498 (__v8si)(__m256i)(W), \
12499 (__mmask8)(U)))
12501 #define _mm256_maskz_shuffle_i32x4(U, X, Y, C) \
12502 ((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \
12503 (__v8si)(__m256i)(Y), (int)(C), \
12504 (__v8si)(__m256i)_mm256_setzero_si256(), \
12505 (__mmask8)(U)))
12507 #define _mm256_shuffle_f64x2(X, Y, C) \
12508 ((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \
12509 (__v4df)(__m256d)(Y), (int)(C), \
12510 (__v4df)(__m256d)_mm256_setzero_pd(), \
12511 (__mmask8)-1))
12513 #define _mm256_mask_shuffle_f64x2(W, U, X, Y, C) \
12514 ((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \
12515 (__v4df)(__m256d)(Y), (int)(C), \
12516 (__v4df)(__m256d)(W), \
12517 (__mmask8)(U)))
12519 #define _mm256_maskz_shuffle_f64x2(U, X, Y, C) \
12520 ((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \
12521 (__v4df)(__m256d)(Y), (int)(C), \
12522 (__v4df)(__m256d)_mm256_setzero_pd(), \
12523 (__mmask8)(U)))
12525 #define _mm256_shuffle_f32x4(X, Y, C) \
12526 ((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \
12527 (__v8sf)(__m256)(Y), (int)(C), \
12528 (__v8sf)(__m256)_mm256_setzero_ps(), \
12529 (__mmask8)-1))
12531 #define _mm256_mask_shuffle_f32x4(W, U, X, Y, C) \
12532 ((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \
12533 (__v8sf)(__m256)(Y), (int)(C), \
12534 (__v8sf)(__m256)(W), \
12535 (__mmask8)(U)))
12537 #define _mm256_maskz_shuffle_f32x4(U, X, Y, C) \
12538 ((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \
12539 (__v8sf)(__m256)(Y), (int)(C), \
12540 (__v8sf)(__m256)_mm256_setzero_ps(), \
12541 (__mmask8)(U)))
12543 #define _mm256_mask_shuffle_pd(W, U, A, B, C) \
12544 ((__m256d)__builtin_ia32_shufpd256_mask ((__v4df)(__m256d)(A), \
12545 (__v4df)(__m256d)(B), (int)(C), \
12546 (__v4df)(__m256d)(W), \
12547 (__mmask8)(U)))
12549 #define _mm256_maskz_shuffle_pd(U, A, B, C) \
12550 ((__m256d)__builtin_ia32_shufpd256_mask ((__v4df)(__m256d)(A), \
12551 (__v4df)(__m256d)(B), (int)(C), \
12552 (__v4df)(__m256d)_mm256_setzero_pd(),\
12553 (__mmask8)(U)))
12555 #define _mm_mask_shuffle_pd(W, U, A, B, C) \
12556 ((__m128d)__builtin_ia32_shufpd128_mask ((__v2df)(__m128d)(A), \
12557 (__v2df)(__m128d)(B), (int)(C), \
12558 (__v2df)(__m128d)(W), \
12559 (__mmask8)(U)))
12561 #define _mm_maskz_shuffle_pd(U, A, B, C) \
12562 ((__m128d)__builtin_ia32_shufpd128_mask ((__v2df)(__m128d)(A), \
12563 (__v2df)(__m128d)(B), (int)(C), \
12564 (__v2df)(__m128d)_mm_setzero_pd(), \
12565 (__mmask8)(U)))
12567 #define _mm256_mask_shuffle_ps(W, U, A, B, C) \
12568 ((__m256) __builtin_ia32_shufps256_mask ((__v8sf)(__m256)(A), \
12569 (__v8sf)(__m256)(B), (int)(C), \
12570 (__v8sf)(__m256)(W), \
12571 (__mmask8)(U)))
12573 #define _mm256_maskz_shuffle_ps(U, A, B, C) \
12574 ((__m256) __builtin_ia32_shufps256_mask ((__v8sf)(__m256)(A), \
12575 (__v8sf)(__m256)(B), (int)(C), \
12576 (__v8sf)(__m256)_mm256_setzero_ps(), \
12577 (__mmask8)(U)))
12579 #define _mm_mask_shuffle_ps(W, U, A, B, C) \
12580 ((__m128) __builtin_ia32_shufps128_mask ((__v4sf)(__m128)(A), \
12581 (__v4sf)(__m128)(B), (int)(C), \
12582 (__v4sf)(__m128)(W), \
12583 (__mmask8)(U)))
12585 #define _mm_maskz_shuffle_ps(U, A, B, C) \
12586 ((__m128) __builtin_ia32_shufps128_mask ((__v4sf)(__m128)(A), \
12587 (__v4sf)(__m128)(B), (int)(C), \
12588 (__v4sf)(__m128)_mm_setzero_ps(), \
12589 (__mmask8)(U)))
12591 #define _mm256_fixupimm_pd(X, Y, Z, C) \
12592 ((__m256d)__builtin_ia32_fixupimmpd256_mask ((__v4df)(__m256d)(X), \
12593 (__v4df)(__m256d)(Y), \
12594 (__v4di)(__m256i)(Z), (int)(C), \
12595 (__mmask8)(-1)))
12597 #define _mm256_mask_fixupimm_pd(X, U, Y, Z, C) \
12598 ((__m256d)__builtin_ia32_fixupimmpd256_mask ((__v4df)(__m256d)(X), \
12599 (__v4df)(__m256d)(Y), \
12600 (__v4di)(__m256i)(Z), (int)(C), \
12601 (__mmask8)(U)))
12603 #define _mm256_maskz_fixupimm_pd(U, X, Y, Z, C) \
12604 ((__m256d)__builtin_ia32_fixupimmpd256_maskz ((__v4df)(__m256d)(X), \
12605 (__v4df)(__m256d)(Y), \
12606 (__v4di)(__m256i)(Z), (int)(C),\
12607 (__mmask8)(U)))
12609 #define _mm256_fixupimm_ps(X, Y, Z, C) \
12610 ((__m256)__builtin_ia32_fixupimmps256_mask ((__v8sf)(__m256)(X), \
12611 (__v8sf)(__m256)(Y), \
12612 (__v8si)(__m256i)(Z), (int)(C), \
12613 (__mmask8)(-1)))
12616 #define _mm256_mask_fixupimm_ps(X, U, Y, Z, C) \
12617 ((__m256)__builtin_ia32_fixupimmps256_mask ((__v8sf)(__m256)(X), \
12618 (__v8sf)(__m256)(Y), \
12619 (__v8si)(__m256i)(Z), (int)(C), \
12620 (__mmask8)(U)))
12622 #define _mm256_maskz_fixupimm_ps(U, X, Y, Z, C) \
12623 ((__m256)__builtin_ia32_fixupimmps256_maskz ((__v8sf)(__m256)(X), \
12624 (__v8sf)(__m256)(Y), \
12625 (__v8si)(__m256i)(Z), (int)(C),\
12626 (__mmask8)(U)))
12628 #define _mm_fixupimm_pd(X, Y, Z, C) \
12629 ((__m128d)__builtin_ia32_fixupimmpd128_mask ((__v2df)(__m128d)(X), \
12630 (__v2df)(__m128d)(Y), \
12631 (__v2di)(__m128i)(Z), (int)(C), \
12632 (__mmask8)(-1)))
12635 #define _mm_mask_fixupimm_pd(X, U, Y, Z, C) \
12636 ((__m128d)__builtin_ia32_fixupimmpd128_mask ((__v2df)(__m128d)(X), \
12637 (__v2df)(__m128d)(Y), \
12638 (__v2di)(__m128i)(Z), (int)(C), \
12639 (__mmask8)(U)))
12641 #define _mm_maskz_fixupimm_pd(U, X, Y, Z, C) \
12642 ((__m128d)__builtin_ia32_fixupimmpd128_maskz ((__v2df)(__m128d)(X), \
12643 (__v2df)(__m128d)(Y), \
12644 (__v2di)(__m128i)(Z), (int)(C),\
12645 (__mmask8)(U)))
12647 #define _mm_fixupimm_ps(X, Y, Z, C) \
12648 ((__m128)__builtin_ia32_fixupimmps128_mask ((__v4sf)(__m128)(X), \
12649 (__v4sf)(__m128)(Y), \
12650 (__v4si)(__m128i)(Z), (int)(C), \
12651 (__mmask8)(-1)))
12653 #define _mm_mask_fixupimm_ps(X, U, Y, Z, C) \
12654 ((__m128)__builtin_ia32_fixupimmps128_mask ((__v4sf)(__m128)(X), \
12655 (__v4sf)(__m128)(Y), \
12656 (__v4si)(__m128i)(Z), (int)(C),\
12657 (__mmask8)(U)))
12659 #define _mm_maskz_fixupimm_ps(U, X, Y, Z, C) \
12660 ((__m128)__builtin_ia32_fixupimmps128_maskz ((__v4sf)(__m128)(X), \
12661 (__v4sf)(__m128)(Y), \
12662 (__v4si)(__m128i)(Z), (int)(C),\
12663 (__mmask8)(U)))
12665 #define _mm256_mask_srli_epi32(W, U, A, B) \
12666 ((__m256i) __builtin_ia32_psrldi256_mask ((__v8si)(__m256i)(A), \
12667 (int)(B), (__v8si)(__m256i)(W), (__mmask8)(U)))
12669 #define _mm256_maskz_srli_epi32(U, A, B) \
12670 ((__m256i) __builtin_ia32_psrldi256_mask ((__v8si)(__m256i)(A), \
12671 (int)(B), (__v8si)_mm256_setzero_si256(), (__mmask8)(U)))
12673 #define _mm_mask_srli_epi32(W, U, A, B) \
12674 ((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A), \
12675 (int)(B), (__v4si)(__m128i)(W), (__mmask8)(U)))
12677 #define _mm_maskz_srli_epi32(U, A, B) \
12678 ((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A), \
12679 (int)(B), (__v4si)_mm_setzero_si128(), (__mmask8)(U)))
12681 #define _mm256_mask_srli_epi64(W, U, A, B) \
12682 ((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A), \
12683 (int)(B), (__v4di)(__m256i)(W), (__mmask8)(U)))
12685 #define _mm256_maskz_srli_epi64(U, A, B) \
12686 ((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A), \
12687 (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)(U)))
12689 #define _mm_mask_srli_epi64(W, U, A, B) \
12690 ((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A), \
12691 (int)(B), (__v2di)(__m128i)(W), (__mmask8)(U)))
12693 #define _mm_maskz_srli_epi64(U, A, B) \
12694 ((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A), \
12695 (int)(B), (__v2di)_mm_setzero_si128(), (__mmask8)(U)))
12697 #define _mm256_mask_slli_epi32(W, U, X, C) \
12698 ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\
12699 (__v8si)(__m256i)(W),\
12700 (__mmask8)(U)))
12702 #define _mm256_maskz_slli_epi32(U, X, C) \
12703 ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\
12704 (__v8si)(__m256i)_mm256_setzero_si256(),\
12705 (__mmask8)(U)))
12707 #define _mm256_mask_slli_epi64(W, U, X, C) \
12708 ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\
12709 (__v4di)(__m256i)(W),\
12710 (__mmask8)(U)))
12712 #define _mm256_maskz_slli_epi64(U, X, C) \
12713 ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\
12714 (__v4di)(__m256i)_mm256_setzero_si256 (),\
12715 (__mmask8)(U)))
12717 #define _mm_mask_slli_epi32(W, U, X, C) \
12718 ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X), (int)(C),\
12719 (__v4si)(__m128i)(W),\
12720 (__mmask8)(U)))
12722 #define _mm_maskz_slli_epi32(U, X, C) \
12723 ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X), (int)(C),\
12724 (__v4si)(__m128i)_mm_setzero_si128 (),\
12725 (__mmask8)(U)))
12727 #define _mm_mask_slli_epi64(W, U, X, C) \
12728 ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C),\
12729 (__v2di)(__m128i)(W),\
12730 (__mmask8)(U)))
12732 #define _mm_maskz_slli_epi64(U, X, C) \
12733 ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C),\
12734 (__v2di)(__m128i)_mm_setzero_di(),\
12735 (__mmask8)(U)))
12737 #define _mm256_ternarylogic_epi64(A, B, C, I) \
12738 ((__m256i) __builtin_ia32_pternlogq256_mask ((__v4di)(__m256i)(A), \
12739 (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)-1))
12741 #define _mm256_mask_ternarylogic_epi64(A, U, B, C, I) \
12742 ((__m256i) __builtin_ia32_pternlogq256_mask ((__v4di)(__m256i)(A), \
12743 (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)(U)))
12745 #define _mm256_maskz_ternarylogic_epi64(U, A, B, C, I) \
12746 ((__m256i) __builtin_ia32_pternlogq256_maskz ((__v4di)(__m256i)(A), \
12747 (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)(U)))
12749 #define _mm256_ternarylogic_epi32(A, B, C, I) \
12750 ((__m256i) __builtin_ia32_pternlogd256_mask ((__v8si)(__m256i)(A), \
12751 (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)-1))
12753 #define _mm256_mask_ternarylogic_epi32(A, U, B, C, I) \
12754 ((__m256i) __builtin_ia32_pternlogd256_mask ((__v8si)(__m256i)(A), \
12755 (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)(U)))
12757 #define _mm256_maskz_ternarylogic_epi32(U, A, B, C, I) \
12758 ((__m256i) __builtin_ia32_pternlogd256_maskz ((__v8si)(__m256i)(A), \
12759 (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)(U)))
12761 #define _mm_ternarylogic_epi64(A, B, C, I) \
12762 ((__m128i) __builtin_ia32_pternlogq128_mask ((__v2di)(__m128i)(A), \
12763 (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)-1))
12765 #define _mm_mask_ternarylogic_epi64(A, U, B, C, I) \
12766 ((__m128i) __builtin_ia32_pternlogq128_mask ((__v2di)(__m128i)(A), \
12767 (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)(U)))
12769 #define _mm_maskz_ternarylogic_epi64(U, A, B, C, I) \
12770 ((__m128i) __builtin_ia32_pternlogq128_maskz ((__v2di)(__m128i)(A), \
12771 (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)(U)))
12773 #define _mm_ternarylogic_epi32(A, B, C, I) \
12774 ((__m128i) __builtin_ia32_pternlogd128_mask ((__v4si)(__m128i)(A), \
12775 (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)-1))
12777 #define _mm_mask_ternarylogic_epi32(A, U, B, C, I) \
12778 ((__m128i) __builtin_ia32_pternlogd128_mask ((__v4si)(__m128i)(A), \
12779 (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)(U)))
12781 #define _mm_maskz_ternarylogic_epi32(U, A, B, C, I) \
12782 ((__m128i) __builtin_ia32_pternlogd128_maskz ((__v4si)(__m128i)(A), \
12783 (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)(U)))
12785 #define _mm256_roundscale_ps(A, B) \
12786 ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \
12787 (int)(B), (__v8sf)(__m256)_mm256_setzero_ps(), (__mmask8)-1))
12789 #define _mm256_mask_roundscale_ps(W, U, A, B) \
12790 ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \
12791 (int)(B), (__v8sf)(__m256)(W), (__mmask8)(U)))
12793 #define _mm256_maskz_roundscale_ps(U, A, B) \
12794 ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \
12795 (int)(B), (__v8sf)(__m256)_mm256_setzero_ps(), (__mmask8)(U)))
12797 #define _mm256_roundscale_pd(A, B) \
12798 ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \
12799 (int)(B), (__v4df)(__m256d)_mm256_setzero_pd(), (__mmask8)-1))
12801 #define _mm256_mask_roundscale_pd(W, U, A, B) \
12802 ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \
12803 (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
12805 #define _mm256_maskz_roundscale_pd(U, A, B) \
12806 ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \
12807 (int)(B), (__v4df)(__m256d)_mm256_setzero_pd(), (__mmask8)(U)))
12809 #define _mm_roundscale_ps(A, B) \
12810 ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \
12811 (int)(B), (__v4sf)(__m128)_mm_setzero_ps(), (__mmask8)-1))
12813 #define _mm_mask_roundscale_ps(W, U, A, B) \
12814 ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \
12815 (int)(B), (__v4sf)(__m128)(W), (__mmask8)(U)))
12817 #define _mm_maskz_roundscale_ps(U, A, B) \
12818 ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \
12819 (int)(B), (__v4sf)(__m128)_mm_setzero_ps(), (__mmask8)(U)))
12821 #define _mm_roundscale_pd(A, B) \
12822 ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \
12823 (int)(B), (__v2df)(__m128d)_mm_setzero_pd(), (__mmask8)-1))
12825 #define _mm_mask_roundscale_pd(W, U, A, B) \
12826 ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \
12827 (int)(B), (__v2df)(__m128d)(W), (__mmask8)(U)))
12829 #define _mm_maskz_roundscale_pd(U, A, B) \
12830 ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \
12831 (int)(B), (__v2df)(__m128d)_mm_setzero_pd(), (__mmask8)(U)))
12833 #define _mm256_getmant_ps(X, B, C) \
12834 ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \
12835 (int)(((C)<<2) | (B)), \
12836 (__v8sf)(__m256)_mm256_setzero_ps(), \
12837 (__mmask8)-1))
12839 #define _mm256_mask_getmant_ps(W, U, X, B, C) \
12840 ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \
12841 (int)(((C)<<2) | (B)), \
12842 (__v8sf)(__m256)(W), \
12843 (__mmask8)(U)))
12845 #define _mm256_maskz_getmant_ps(U, X, B, C) \
12846 ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \
12847 (int)(((C)<<2) | (B)), \
12848 (__v8sf)(__m256)_mm256_setzero_ps(), \
12849 (__mmask8)(U)))
12851 #define _mm_getmant_ps(X, B, C) \
12852 ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \
12853 (int)(((C)<<2) | (B)), \
12854 (__v4sf)(__m128)_mm_setzero_ps(), \
12855 (__mmask8)-1))
12857 #define _mm_mask_getmant_ps(W, U, X, B, C) \
12858 ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \
12859 (int)(((C)<<2) | (B)), \
12860 (__v4sf)(__m128)(W), \
12861 (__mmask8)(U)))
12863 #define _mm_maskz_getmant_ps(U, X, B, C) \
12864 ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \
12865 (int)(((C)<<2) | (B)), \
12866 (__v4sf)(__m128)_mm_setzero_ps(), \
12867 (__mmask8)(U)))
12869 #define _mm256_getmant_pd(X, B, C) \
12870 ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \
12871 (int)(((C)<<2) | (B)), \
12872 (__v4df)(__m256d)_mm256_setzero_pd(), \
12873 (__mmask8)-1))
12875 #define _mm256_mask_getmant_pd(W, U, X, B, C) \
12876 ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \
12877 (int)(((C)<<2) | (B)), \
12878 (__v4df)(__m256d)(W), \
12879 (__mmask8)(U)))
12881 #define _mm256_maskz_getmant_pd(U, X, B, C) \
12882 ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \
12883 (int)(((C)<<2) | (B)), \
12884 (__v4df)(__m256d)_mm256_setzero_pd(), \
12885 (__mmask8)(U)))
12887 #define _mm_getmant_pd(X, B, C) \
12888 ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \
12889 (int)(((C)<<2) | (B)), \
12890 (__v2df)(__m128d)_mm_setzero_pd(), \
12891 (__mmask8)-1))
12893 #define _mm_mask_getmant_pd(W, U, X, B, C) \
12894 ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \
12895 (int)(((C)<<2) | (B)), \
12896 (__v2df)(__m128d)(W), \
12897 (__mmask8)(U)))
12899 #define _mm_maskz_getmant_pd(U, X, B, C) \
12900 ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \
12901 (int)(((C)<<2) | (B)), \
12902 (__v2df)(__m128d)_mm_setzero_pd(), \
12903 (__mmask8)(U)))
12905 #define _mm256_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
12906 (__m256) __builtin_ia32_gather3siv8sf ((__v8sf)(__m256)V1OLD, \
12907 (float const *)ADDR, \
12908 (__v8si)(__m256i)INDEX, \
12909 (__mmask8)MASK, (int)SCALE)
12911 #define _mm_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
12912 (__m128) __builtin_ia32_gather3siv4sf ((__v4sf)(__m128)V1OLD, \
12913 (float const *)ADDR, \
12914 (__v4si)(__m128i)INDEX, \
12915 (__mmask8)MASK, (int)SCALE)
12917 #define _mm256_mmask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
12918 (__m256d) __builtin_ia32_gather3siv4df ((__v4df)(__m256d)V1OLD, \
12919 (double const *)ADDR, \
12920 (__v4si)(__m128i)INDEX, \
12921 (__mmask8)MASK, (int)SCALE)
12923 #define _mm_mmask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
12924 (__m128d) __builtin_ia32_gather3siv2df ((__v2df)(__m128d)V1OLD, \
12925 (double const *)ADDR, \
12926 (__v4si)(__m128i)INDEX, \
12927 (__mmask8)MASK, (int)SCALE)
12929 #define _mm256_mmask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
12930 (__m128) __builtin_ia32_gather3div8sf ((__v4sf)(__m128)V1OLD, \
12931 (float const *)ADDR, \
12932 (__v4di)(__m256i)INDEX, \
12933 (__mmask8)MASK, (int)SCALE)
12935 #define _mm_mmask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
12936 (__m128) __builtin_ia32_gather3div4sf ((__v4sf)(__m128)V1OLD, \
12937 (float const *)ADDR, \
12938 (__v2di)(__m128i)INDEX, \
12939 (__mmask8)MASK, (int)SCALE)
12941 #define _mm256_mmask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
12942 (__m256d) __builtin_ia32_gather3div4df ((__v4df)(__m256d)V1OLD, \
12943 (double const *)ADDR, \
12944 (__v4di)(__m256i)INDEX, \
12945 (__mmask8)MASK, (int)SCALE)
12947 #define _mm_mmask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
12948 (__m128d) __builtin_ia32_gather3div2df ((__v2df)(__m128d)V1OLD, \
12949 (double const *)ADDR, \
12950 (__v2di)(__m128i)INDEX, \
12951 (__mmask8)MASK, (int)SCALE)
12953 #define _mm256_mmask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
12954 (__m256i) __builtin_ia32_gather3siv8si ((__v8si)(__m256i)V1OLD, \
12955 (int const *)ADDR, \
12956 (__v8si)(__m256i)INDEX, \
12957 (__mmask8)MASK, (int)SCALE)
12959 #define _mm_mmask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
12960 (__m128i) __builtin_ia32_gather3siv4si ((__v4si)(__m128i)V1OLD, \
12961 (int const *)ADDR, \
12962 (__v4si)(__m128i)INDEX, \
12963 (__mmask8)MASK, (int)SCALE)
12965 #define _mm256_mmask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
12966 (__m256i) __builtin_ia32_gather3siv4di ((__v4di)(__m256i)V1OLD, \
12967 (long long const *)ADDR, \
12968 (__v4si)(__m128i)INDEX, \
12969 (__mmask8)MASK, (int)SCALE)
12971 #define _mm_mmask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
12972 (__m128i) __builtin_ia32_gather3siv2di ((__v2di)(__m128i)V1OLD, \
12973 (long long const *)ADDR, \
12974 (__v4si)(__m128i)INDEX, \
12975 (__mmask8)MASK, (int)SCALE)
12977 #define _mm256_mmask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
12978 (__m128i) __builtin_ia32_gather3div8si ((__v4si)(__m128i)V1OLD, \
12979 (int const *)ADDR, \
12980 (__v4di)(__m256i)INDEX, \
12981 (__mmask8)MASK, (int)SCALE)
12983 #define _mm_mmask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
12984 (__m128i) __builtin_ia32_gather3div4si ((__v4si)(__m128i)V1OLD, \
12985 (int const *)ADDR, \
12986 (__v2di)(__m128i)INDEX, \
12987 (__mmask8)MASK, (int)SCALE)
12989 #define _mm256_mmask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
12990 (__m256i) __builtin_ia32_gather3div4di ((__v4di)(__m256i)V1OLD, \
12991 (long long const *)ADDR, \
12992 (__v4di)(__m256i)INDEX, \
12993 (__mmask8)MASK, (int)SCALE)
12995 #define _mm_mmask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
12996 (__m128i) __builtin_ia32_gather3div2di ((__v2di)(__m128i)V1OLD, \
12997 (long long const *)ADDR, \
12998 (__v2di)(__m128i)INDEX, \
12999 (__mmask8)MASK, (int)SCALE)
13001 #define _mm256_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
13002 __builtin_ia32_scattersiv8sf ((float *)ADDR, (__mmask8)0xFF, \
13003 (__v8si)(__m256i)INDEX, \
13004 (__v8sf)(__m256)V1, (int)SCALE)
13006 #define _mm256_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
13007 __builtin_ia32_scattersiv8sf ((float *)ADDR, (__mmask8)MASK, \
13008 (__v8si)(__m256i)INDEX, \
13009 (__v8sf)(__m256)V1, (int)SCALE)
13011 #define _mm_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
13012 __builtin_ia32_scattersiv4sf ((float *)ADDR, (__mmask8)0xFF, \
13013 (__v4si)(__m128i)INDEX, \
13014 (__v4sf)(__m128)V1, (int)SCALE)
13016 #define _mm_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
13017 __builtin_ia32_scattersiv4sf ((float *)ADDR, (__mmask8)MASK, \
13018 (__v4si)(__m128i)INDEX, \
13019 (__v4sf)(__m128)V1, (int)SCALE)
13021 #define _mm256_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
13022 __builtin_ia32_scattersiv4df ((double *)ADDR, (__mmask8)0xFF, \
13023 (__v4si)(__m128i)INDEX, \
13024 (__v4df)(__m256d)V1, (int)SCALE)
13026 #define _mm256_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
13027 __builtin_ia32_scattersiv4df ((double *)ADDR, (__mmask8)MASK, \
13028 (__v4si)(__m128i)INDEX, \
13029 (__v4df)(__m256d)V1, (int)SCALE)
13031 #define _mm_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
13032 __builtin_ia32_scattersiv2df ((double *)ADDR, (__mmask8)0xFF, \
13033 (__v4si)(__m128i)INDEX, \
13034 (__v2df)(__m128d)V1, (int)SCALE)
13036 #define _mm_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
13037 __builtin_ia32_scattersiv2df ((double *)ADDR, (__mmask8)MASK, \
13038 (__v4si)(__m128i)INDEX, \
13039 (__v2df)(__m128d)V1, (int)SCALE)
13041 #define _mm256_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
13042 __builtin_ia32_scatterdiv8sf ((float *)ADDR, (__mmask8)0xFF, \
13043 (__v4di)(__m256i)INDEX, \
13044 (__v4sf)(__m128)V1, (int)SCALE)
13046 #define _mm256_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
13047 __builtin_ia32_scatterdiv8sf ((float *)ADDR, (__mmask8)MASK, \
13048 (__v4di)(__m256i)INDEX, \
13049 (__v4sf)(__m128)V1, (int)SCALE)
13051 #define _mm_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
13052 __builtin_ia32_scatterdiv4sf ((float *)ADDR, (__mmask8)0xFF, \
13053 (__v2di)(__m128i)INDEX, \
13054 (__v4sf)(__m128)V1, (int)SCALE)
13056 #define _mm_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
13057 __builtin_ia32_scatterdiv4sf ((float *)ADDR, (__mmask8)MASK, \
13058 (__v2di)(__m128i)INDEX, \
13059 (__v4sf)(__m128)V1, (int)SCALE)
13061 #define _mm256_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
13062 __builtin_ia32_scatterdiv4df ((double *)ADDR, (__mmask8)0xFF, \
13063 (__v4di)(__m256i)INDEX, \
13064 (__v4df)(__m256d)V1, (int)SCALE)
13066 #define _mm256_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
13067 __builtin_ia32_scatterdiv4df ((double *)ADDR, (__mmask8)MASK, \
13068 (__v4di)(__m256i)INDEX, \
13069 (__v4df)(__m256d)V1, (int)SCALE)
13071 #define _mm_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
13072 __builtin_ia32_scatterdiv2df ((double *)ADDR, (__mmask8)0xFF, \
13073 (__v2di)(__m128i)INDEX, \
13074 (__v2df)(__m128d)V1, (int)SCALE)
13076 #define _mm_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
13077 __builtin_ia32_scatterdiv2df ((double *)ADDR, (__mmask8)MASK, \
13078 (__v2di)(__m128i)INDEX, \
13079 (__v2df)(__m128d)V1, (int)SCALE)
13081 #define _mm256_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
13082 __builtin_ia32_scattersiv8si ((int *)ADDR, (__mmask8)0xFF, \
13083 (__v8si)(__m256i)INDEX, \
13084 (__v8si)(__m256i)V1, (int)SCALE)
13086 #define _mm256_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
13087 __builtin_ia32_scattersiv8si ((int *)ADDR, (__mmask8)MASK, \
13088 (__v8si)(__m256i)INDEX, \
13089 (__v8si)(__m256i)V1, (int)SCALE)
13091 #define _mm_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
13092 __builtin_ia32_scattersiv4si ((int *)ADDR, (__mmask8)0xFF, \
13093 (__v4si)(__m128i)INDEX, \
13094 (__v4si)(__m128i)V1, (int)SCALE)
13096 #define _mm_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
13097 __builtin_ia32_scattersiv4si ((int *)ADDR, (__mmask8)MASK, \
13098 (__v4si)(__m128i)INDEX, \
13099 (__v4si)(__m128i)V1, (int)SCALE)
13101 #define _mm256_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
13102 __builtin_ia32_scattersiv4di ((long long *)ADDR, (__mmask8)0xFF, \
13103 (__v4si)(__m128i)INDEX, \
13104 (__v4di)(__m256i)V1, (int)SCALE)
13106 #define _mm256_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
13107 __builtin_ia32_scattersiv4di ((long long *)ADDR, (__mmask8)MASK, \
13108 (__v4si)(__m128i)INDEX, \
13109 (__v4di)(__m256i)V1, (int)SCALE)
13111 #define _mm_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
13112 __builtin_ia32_scattersiv2di ((long long *)ADDR, (__mmask8)0xFF, \
13113 (__v4si)(__m128i)INDEX, \
13114 (__v2di)(__m128i)V1, (int)SCALE)
13116 #define _mm_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
13117 __builtin_ia32_scattersiv2di ((long long *)ADDR, (__mmask8)MASK, \
13118 (__v4si)(__m128i)INDEX, \
13119 (__v2di)(__m128i)V1, (int)SCALE)
13121 #define _mm256_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
13122 __builtin_ia32_scatterdiv8si ((int *)ADDR, (__mmask8)0xFF, \
13123 (__v4di)(__m256i)INDEX, \
13124 (__v4si)(__m128i)V1, (int)SCALE)
13126 #define _mm256_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
13127 __builtin_ia32_scatterdiv8si ((int *)ADDR, (__mmask8)MASK, \
13128 (__v4di)(__m256i)INDEX, \
13129 (__v4si)(__m128i)V1, (int)SCALE)
13131 #define _mm_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
13132 __builtin_ia32_scatterdiv4si ((int *)ADDR, (__mmask8)0xFF, \
13133 (__v2di)(__m128i)INDEX, \
13134 (__v4si)(__m128i)V1, (int)SCALE)
13136 #define _mm_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
13137 __builtin_ia32_scatterdiv4si ((int *)ADDR, (__mmask8)MASK, \
13138 (__v2di)(__m128i)INDEX, \
13139 (__v4si)(__m128i)V1, (int)SCALE)
13141 #define _mm256_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
13142 __builtin_ia32_scatterdiv4di ((long long *)ADDR, (__mmask8)0xFF, \
13143 (__v4di)(__m256i)INDEX, \
13144 (__v4di)(__m256i)V1, (int)SCALE)
13146 #define _mm256_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
13147 __builtin_ia32_scatterdiv4di ((long long *)ADDR, (__mmask8)MASK, \
13148 (__v4di)(__m256i)INDEX, \
13149 (__v4di)(__m256i)V1, (int)SCALE)
13151 #define _mm_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
13152 __builtin_ia32_scatterdiv2di ((long long *)ADDR, (__mmask8)0xFF, \
13153 (__v2di)(__m128i)INDEX, \
13154 (__v2di)(__m128i)V1, (int)SCALE)
13156 #define _mm_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
13157 __builtin_ia32_scatterdiv2di ((long long *)ADDR, (__mmask8)MASK, \
13158 (__v2di)(__m128i)INDEX, \
13159 (__v2di)(__m128i)V1, (int)SCALE)
13161 #define _mm256_mask_shuffle_epi32(W, U, X, C) \
13162 ((__m256i) __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C), \
13163 (__v8si)(__m256i)(W), \
13164 (__mmask8)(U)))
13166 #define _mm256_maskz_shuffle_epi32(U, X, C) \
13167 ((__m256i) __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C), \
13168 (__v8si)(__m256i)_mm256_setzero_si256(), \
13169 (__mmask8)(U)))
13171 #define _mm_mask_shuffle_epi32(W, U, X, C) \
13172 ((__m128i) __builtin_ia32_pshufd128_mask ((__v4si)(__m128i)(X), (int)(C), \
13173 (__v4si)(__m128i)(W), \
13174 (__mmask8)(U)))
13176 #define _mm_maskz_shuffle_epi32(U, X, C) \
13177 ((__m128i) __builtin_ia32_pshufd128_mask ((__v4si)(__m128i)(X), (int)(C), \
13178 (__v4si)(__m128i)_mm_setzero_si128 (), \
13179 (__mmask8)(U)))
13181 #define _mm256_rol_epi64(A, B) \
13182 ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13183 (__v4di)(__m256i)_mm256_setzero_si256 (),\
13184 (__mmask8)-1))
13186 #define _mm256_mask_rol_epi64(W, U, A, B) \
13187 ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13188 (__v4di)(__m256i)(W), \
13189 (__mmask8)(U)))
13191 #define _mm256_maskz_rol_epi64(U, A, B) \
13192 ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13193 (__v4di)(__m256i)_mm256_setzero_si256 (),\
13194 (__mmask8)(U)))
13196 #define _mm_rol_epi64(A, B) \
13197 ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \
13198 (__v2di)(__m128i)_mm_setzero_di(), \
13199 (__mmask8)-1))
13201 #define _mm_mask_rol_epi64(W, U, A, B) \
13202 ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \
13203 (__v2di)(__m128i)(W), \
13204 (__mmask8)(U)))
13206 #define _mm_maskz_rol_epi64(U, A, B) \
13207 ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \
13208 (__v2di)(__m128i)_mm_setzero_di(), \
13209 (__mmask8)(U)))
13211 #define _mm256_ror_epi64(A, B) \
13212 ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13213 (__v4di)(__m256i)_mm256_setzero_si256 (),\
13214 (__mmask8)-1))
13216 #define _mm256_mask_ror_epi64(W, U, A, B) \
13217 ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13218 (__v4di)(__m256i)(W), \
13219 (__mmask8)(U)))
13221 #define _mm256_maskz_ror_epi64(U, A, B) \
13222 ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13223 (__v4di)(__m256i)_mm256_setzero_si256 (),\
13224 (__mmask8)(U)))
13226 #define _mm_ror_epi64(A, B) \
13227 ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \
13228 (__v2di)(__m128i)_mm_setzero_di(), \
13229 (__mmask8)-1))
13231 #define _mm_mask_ror_epi64(W, U, A, B) \
13232 ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \
13233 (__v2di)(__m128i)(W), \
13234 (__mmask8)(U)))
13236 #define _mm_maskz_ror_epi64(U, A, B) \
13237 ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \
13238 (__v2di)(__m128i)_mm_setzero_di(), \
13239 (__mmask8)(U)))
13241 #define _mm256_rol_epi32(A, B) \
13242 ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \
13243 (__v8si)(__m256i)_mm256_setzero_si256(),\
13244 (__mmask8)-1))
13246 #define _mm256_mask_rol_epi32(W, U, A, B) \
13247 ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \
13248 (__v8si)(__m256i)(W), \
13249 (__mmask8)(U)))
13251 #define _mm256_maskz_rol_epi32(U, A, B) \
13252 ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \
13253 (__v8si)(__m256i)_mm256_setzero_si256(),\
13254 (__mmask8)(U)))
13256 #define _mm_rol_epi32(A, B) \
13257 ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \
13258 (__v4si)(__m128i)_mm_setzero_si128 (), \
13259 (__mmask8)-1))
13261 #define _mm_mask_rol_epi32(W, U, A, B) \
13262 ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \
13263 (__v4si)(__m128i)(W), \
13264 (__mmask8)(U)))
13266 #define _mm_maskz_rol_epi32(U, A, B) \
13267 ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \
13268 (__v4si)(__m128i)_mm_setzero_si128 (), \
13269 (__mmask8)(U)))
13271 #define _mm256_ror_epi32(A, B) \
13272 ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \
13273 (__v8si)(__m256i)_mm256_setzero_si256(),\
13274 (__mmask8)-1))
13276 #define _mm256_mask_ror_epi32(W, U, A, B) \
13277 ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \
13278 (__v8si)(__m256i)(W), \
13279 (__mmask8)(U)))
13281 #define _mm256_maskz_ror_epi32(U, A, B) \
13282 ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \
13283 (__v8si)(__m256i)_mm256_setzero_si256(),\
13284 (__mmask8)(U)))
13286 #define _mm_ror_epi32(A, B) \
13287 ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \
13288 (__v4si)(__m128i)_mm_setzero_si128 (), \
13289 (__mmask8)-1))
13291 #define _mm_mask_ror_epi32(W, U, A, B) \
13292 ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \
13293 (__v4si)(__m128i)(W), \
13294 (__mmask8)(U)))
13296 #define _mm_maskz_ror_epi32(U, A, B) \
13297 ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \
13298 (__v4si)(__m128i)_mm_setzero_si128 (), \
13299 (__mmask8)(U)))
13301 #define _mm256_alignr_epi32(X, Y, C) \
13302 ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X), \
13303 (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)(X), (__mmask8)-1))
13305 #define _mm256_mask_alignr_epi32(W, U, X, Y, C) \
13306 ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X), \
13307 (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)(W), (__mmask8)(U)))
13309 #define _mm256_maskz_alignr_epi32(U, X, Y, C) \
13310 ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X), \
13311 (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)_mm256_setzero_si256 (),\
13312 (__mmask8)(U)))
13314 #define _mm256_alignr_epi64(X, Y, C) \
13315 ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X), \
13316 (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)(X), (__mmask8)-1))
13318 #define _mm256_mask_alignr_epi64(W, U, X, Y, C) \
13319 ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X), \
13320 (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)(W), (__mmask8)(U)))
13322 #define _mm256_maskz_alignr_epi64(U, X, Y, C) \
13323 ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X), \
13324 (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)_mm256_setzero_si256 (),\
13325 (__mmask8)(U)))
13327 #define _mm_alignr_epi32(X, Y, C) \
13328 ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X), \
13329 (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)(X), (__mmask8)-1))
13331 #define _mm_mask_alignr_epi32(W, U, X, Y, C) \
13332 ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X), \
13333 (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)(W), (__mmask8)(U)))
13335 #define _mm_maskz_alignr_epi32(U, X, Y, C) \
13336 ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X), \
13337 (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)_mm_setzero_si128(),\
13338 (__mmask8)(U)))
13340 #define _mm_alignr_epi64(X, Y, C) \
13341 ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \
13342 (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(X), (__mmask8)-1))
13344 #define _mm_mask_alignr_epi64(W, U, X, Y, C) \
13345 ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \
13346 (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(X), (__mmask8)-1))
13348 #define _mm_maskz_alignr_epi64(U, X, Y, C) \
13349 ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \
13350 (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)_mm_setzero_si128(),\
13351 (__mmask8)(U)))
13353 #define _mm_mask_cvtps_ph(W, U, A, I) \
13354 ((__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf)(__m128) A, (int) (I), \
13355 (__v8hi)(__m128i) (W), (__mmask8) (U)))
13357 #define _mm_maskz_cvtps_ph(U, A, I) \
13358 ((__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf)(__m128) A, (int) (I), \
13359 (__v8hi)(__m128i) _mm_setzero_hi(), (__mmask8) (U)))
13361 #define _mm256_mask_cvtps_ph(W, U, A, I) \
13362 ((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) A, (int) (I), \
13363 (__v8hi)(__m128i) (W), (__mmask8) (U)))
13365 #define _mm256_maskz_cvtps_ph(U, A, I) \
13366 ((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) A, (int) (I), \
13367 (__v8hi)(__m128i) _mm_setzero_hi(), (__mmask8) (U)))
13369 #define _mm256_mask_srai_epi32(W, U, A, B) \
13370 ((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A), \
13371 (int)(B), (__v8si)(__m256i)(W), (__mmask8)(U)))
13373 #define _mm256_maskz_srai_epi32(U, A, B) \
13374 ((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A), \
13375 (int)(B), (__v8si)_mm256_setzero_si256(), (__mmask8)(U)))
13377 #define _mm_mask_srai_epi32(W, U, A, B) \
13378 ((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A), \
13379 (int)(B), (__v4si)(__m128i)(W), (__mmask8)(U)))
13381 #define _mm_maskz_srai_epi32(U, A, B) \
13382 ((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A), \
13383 (int)(B), (__v4si)_mm_setzero_si128(), (__mmask8)(U)))
13385 #define _mm256_srai_epi64(A, B) \
13386 ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \
13387 (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)-1))
13389 #define _mm256_mask_srai_epi64(W, U, A, B) \
13390 ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \
13391 (int)(B), (__v4di)(__m256i)(W), (__mmask8)(U)))
13393 #define _mm256_maskz_srai_epi64(U, A, B) \
13394 ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \
13395 (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)(U)))
13397 #define _mm_srai_epi64(A, B) \
13398 ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
13399 (int)(B), (__v2di)_mm_setzero_si128(), (__mmask8)-1))
13401 #define _mm_mask_srai_epi64(W, U, A, B) \
13402 ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
13403 (int)(B), (__v2di)(__m128i)(W), (__mmask8)(U)))
13405 #define _mm_maskz_srai_epi64(U, A, B) \
13406 ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
13407 (int)(B), (__v2di)_mm_setzero_si128(), (__mmask8)(U)))
13409 #define _mm256_mask_permutex_pd(W, U, A, B) \
13410 ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A), \
13411 (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
13413 #define _mm256_maskz_permutex_pd(U, A, B) \
13414 ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A), \
13415 (int)(B), (__v4df)(__m256d)_mm256_setzero_pd(), (__mmask8)(U)))
13417 #define _mm256_mask_permute_pd(W, U, X, C) \
13418 ((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C), \
13419 (__v4df)(__m256d)(W), \
13420 (__mmask8)(U)))
13422 #define _mm256_maskz_permute_pd(U, X, C) \
13423 ((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C), \
13424 (__v4df)(__m256d)_mm256_setzero_pd(), \
13425 (__mmask8)(U)))
13427 #define _mm256_mask_permute_ps(W, U, X, C) \
13428 ((__m256) __builtin_ia32_vpermilps256_mask ((__v8sf)(__m256)(X), (int)(C), \
13429 (__v8sf)(__m256)(W), (__mmask8)(U)))
13431 #define _mm256_maskz_permute_ps(U, X, C) \
13432 ((__m256) __builtin_ia32_vpermilps256_mask ((__v8sf)(__m256)(X), (int)(C), \
13433 (__v8sf)(__m256)_mm256_setzero_ps(), \
13434 (__mmask8)(U)))
13436 #define _mm_mask_permute_pd(W, U, X, C) \
13437 ((__m128d) __builtin_ia32_vpermilpd_mask ((__v2df)(__m128d)(X), (int)(C), \
13438 (__v2df)(__m128d)(W), (__mmask8)(U)))
13440 #define _mm_maskz_permute_pd(U, X, C) \
13441 ((__m128d) __builtin_ia32_vpermilpd_mask ((__v2df)(__m128d)(X), (int)(C), \
13442 (__v2df)(__m128d)_mm_setzero_pd(), \
13443 (__mmask8)(U)))
13445 #define _mm_mask_permute_ps(W, U, X, C) \
13446 ((__m128) __builtin_ia32_vpermilps_mask ((__v4sf)(__m128)(X), (int)(C), \
13447 (__v4sf)(__m128)(W), (__mmask8)(U)))
13449 #define _mm_maskz_permute_ps(U, X, C) \
13450 ((__m128) __builtin_ia32_vpermilps_mask ((__v4sf)(__m128)(X), (int)(C), \
13451 (__v4sf)(__m128)_mm_setzero_ps(), \
13452 (__mmask8)(U)))
13454 #define _mm256_mask_blend_pd(__U, __A, __W) \
13455 ((__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) (__A), \
13456 (__v4df) (__W), \
13457 (__mmask8) (__U)))
13459 #define _mm256_mask_blend_ps(__U, __A, __W) \
13460 ((__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) (__A), \
13461 (__v8sf) (__W), \
13462 (__mmask8) (__U)))
13464 #define _mm256_mask_blend_epi64(__U, __A, __W) \
13465 ((__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) (__A), \
13466 (__v4di) (__W), \
13467 (__mmask8) (__U)))
13469 #define _mm256_mask_blend_epi32(__U, __A, __W) \
13470 ((__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) (__A), \
13471 (__v8si) (__W), \
13472 (__mmask8) (__U)))
13474 #define _mm_mask_blend_pd(__U, __A, __W) \
13475 ((__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) (__A), \
13476 (__v2df) (__W), \
13477 (__mmask8) (__U)))
13479 #define _mm_mask_blend_ps(__U, __A, __W) \
13480 ((__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) (__A), \
13481 (__v4sf) (__W), \
13482 (__mmask8) (__U)))
13484 #define _mm_mask_blend_epi64(__U, __A, __W) \
13485 ((__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) (__A), \
13486 (__v2di) (__W), \
13487 (__mmask8) (__U)))
13489 #define _mm_mask_blend_epi32(__U, __A, __W) \
13490 ((__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) (__A), \
13491 (__v4si) (__W), \
13492 (__mmask8) (__U)))
13494 #define _mm256_cmp_epu32_mask(X, Y, P) \
13495 ((__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si)(__m256i)(X), \
13496 (__v8si)(__m256i)(Y), (int)(P),\
13497 (__mmask8)-1))
13499 #define _mm256_cmp_epi64_mask(X, Y, P) \
13500 ((__mmask8) __builtin_ia32_cmpq256_mask ((__v4di)(__m256i)(X), \
13501 (__v4di)(__m256i)(Y), (int)(P),\
13502 (__mmask8)-1))
13504 #define _mm256_cmp_epi32_mask(X, Y, P) \
13505 ((__mmask8) __builtin_ia32_cmpd256_mask ((__v8si)(__m256i)(X), \
13506 (__v8si)(__m256i)(Y), (int)(P),\
13507 (__mmask8)-1))
13509 #define _mm256_cmp_epu64_mask(X, Y, P) \
13510 ((__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di)(__m256i)(X), \
13511 (__v4di)(__m256i)(Y), (int)(P),\
13512 (__mmask8)-1))
13514 #define _mm256_cmp_pd_mask(X, Y, P) \
13515 ((__mmask8) __builtin_ia32_cmppd256_mask ((__v4df)(__m256d)(X), \
13516 (__v4df)(__m256d)(Y), (int)(P),\
13517 (__mmask8)-1))
13519 #define _mm256_cmp_ps_mask(X, Y, P) \
13520 ((__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf)(__m256)(X), \
13521 (__v8sf)(__m256)(Y), (int)(P),\
13522 (__mmask8)-1))
13524 #define _mm256_mask_cmp_epi64_mask(M, X, Y, P) \
13525 ((__mmask8) __builtin_ia32_cmpq256_mask ((__v4di)(__m256i)(X), \
13526 (__v4di)(__m256i)(Y), (int)(P),\
13527 (__mmask8)(M)))
13529 #define _mm256_mask_cmp_epi32_mask(M, X, Y, P) \
13530 ((__mmask8) __builtin_ia32_cmpd256_mask ((__v8si)(__m256i)(X), \
13531 (__v8si)(__m256i)(Y), (int)(P),\
13532 (__mmask8)(M)))
13534 #define _mm256_mask_cmp_epu64_mask(M, X, Y, P) \
13535 ((__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di)(__m256i)(X), \
13536 (__v4di)(__m256i)(Y), (int)(P),\
13537 (__mmask8)(M)))
13539 #define _mm256_mask_cmp_epu32_mask(M, X, Y, P) \
13540 ((__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si)(__m256i)(X), \
13541 (__v8si)(__m256i)(Y), (int)(P),\
13542 (__mmask8)(M)))
13544 #define _mm256_mask_cmp_pd_mask(M, X, Y, P) \
13545 ((__mmask8) __builtin_ia32_cmppd256_mask ((__v4df)(__m256d)(X), \
13546 (__v4df)(__m256d)(Y), (int)(P),\
13547 (__mmask8)(M)))
13549 #define _mm256_mask_cmp_ps_mask(M, X, Y, P) \
13550 ((__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf)(__m256)(X), \
13551 (__v8sf)(__m256)(Y), (int)(P),\
13552 (__mmask8)(M)))
13554 #define _mm_cmp_epi64_mask(X, Y, P) \
13555 ((__mmask8) __builtin_ia32_cmpq128_mask ((__v2di)(__m128i)(X), \
13556 (__v2di)(__m128i)(Y), (int)(P),\
13557 (__mmask8)-1))
13559 #define _mm_cmp_epi32_mask(X, Y, P) \
13560 ((__mmask8) __builtin_ia32_cmpd128_mask ((__v4si)(__m128i)(X), \
13561 (__v4si)(__m128i)(Y), (int)(P),\
13562 (__mmask8)-1))
13564 #define _mm_cmp_epu64_mask(X, Y, P) \
13565 ((__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di)(__m128i)(X), \
13566 (__v2di)(__m128i)(Y), (int)(P),\
13567 (__mmask8)-1))
13569 #define _mm_cmp_epu32_mask(X, Y, P) \
13570 ((__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si)(__m128i)(X), \
13571 (__v4si)(__m128i)(Y), (int)(P),\
13572 (__mmask8)-1))
13574 #define _mm_cmp_pd_mask(X, Y, P) \
13575 ((__mmask8) __builtin_ia32_cmppd128_mask ((__v2df)(__m128d)(X), \
13576 (__v2df)(__m128d)(Y), (int)(P),\
13577 (__mmask8)-1))
13579 #define _mm_cmp_ps_mask(X, Y, P) \
13580 ((__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf)(__m128)(X), \
13581 (__v4sf)(__m128)(Y), (int)(P),\
13582 (__mmask8)-1))
13584 #define _mm_mask_cmp_epi64_mask(M, X, Y, P) \
13585 ((__mmask8) __builtin_ia32_cmpq128_mask ((__v2di)(__m128i)(X), \
13586 (__v2di)(__m128i)(Y), (int)(P),\
13587 (__mmask8)(M)))
13589 #define _mm_mask_cmp_epi32_mask(M, X, Y, P) \
13590 ((__mmask8) __builtin_ia32_cmpd128_mask ((__v4si)(__m128i)(X), \
13591 (__v4si)(__m128i)(Y), (int)(P),\
13592 (__mmask8)(M)))
13594 #define _mm_mask_cmp_epu64_mask(M, X, Y, P) \
13595 ((__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di)(__m128i)(X), \
13596 (__v2di)(__m128i)(Y), (int)(P),\
13597 (__mmask8)(M)))
13599 #define _mm_mask_cmp_epu32_mask(M, X, Y, P) \
13600 ((__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si)(__m128i)(X), \
13601 (__v4si)(__m128i)(Y), (int)(P),\
13602 (__mmask8)(M)))
13604 #define _mm_mask_cmp_pd_mask(M, X, Y, P) \
13605 ((__mmask8) __builtin_ia32_cmppd128_mask ((__v2df)(__m128d)(X), \
13606 (__v2df)(__m128d)(Y), (int)(P),\
13607 (__mmask8)(M)))
13609 #define _mm_mask_cmp_ps_mask(M, X, Y, P) \
13610 ((__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf)(__m128)(X), \
13611 (__v4sf)(__m128)(Y), (int)(P),\
13612 (__mmask8)(M)))
13614 #endif
13616 #define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps((B), (A))
13618 #ifdef __DISABLE_AVX512VL__
13619 #undef __DISABLE_AVX512VL__
13620 #pragma GCC pop_options
13621 #endif /* __DISABLE_AVX512VL__ */
13623 #endif /* _AVX512VLINTRIN_H_INCLUDED */