PR target/87674
[official-gcc.git] / gcc / config / i386 / avx512vlintrin.h
blob37a01e83afec4ef54c41a3b1d6d274e5c8042cef
1 /* Copyright (C) 2014-2018 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
24 #ifndef _IMMINTRIN_H_INCLUDED
25 #error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
26 #endif
28 #ifndef _AVX512VLINTRIN_H_INCLUDED
29 #define _AVX512VLINTRIN_H_INCLUDED
31 #ifndef __AVX512VL__
32 #pragma GCC push_options
33 #pragma GCC target("avx512vl")
34 #define __DISABLE_AVX512VL__
35 #endif /* __AVX512VL__ */
37 /* Internal data types for implementing the intrinsics. */
38 typedef unsigned int __mmask32;
40 extern __inline __m256d
41 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
42 _mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A)
44 return (__m256d) __builtin_ia32_movapd256_mask ((__v4df) __A,
45 (__v4df) __W,
46 (__mmask8) __U);
49 extern __inline __m256d
50 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
51 _mm256_maskz_mov_pd (__mmask8 __U, __m256d __A)
53 return (__m256d) __builtin_ia32_movapd256_mask ((__v4df) __A,
54 (__v4df)
55 _mm256_setzero_pd (),
56 (__mmask8) __U);
59 extern __inline __m128d
60 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
61 _mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A)
63 return (__m128d) __builtin_ia32_movapd128_mask ((__v2df) __A,
64 (__v2df) __W,
65 (__mmask8) __U);
68 extern __inline __m128d
69 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
70 _mm_maskz_mov_pd (__mmask8 __U, __m128d __A)
72 return (__m128d) __builtin_ia32_movapd128_mask ((__v2df) __A,
73 (__v2df)
74 _mm_setzero_pd (),
75 (__mmask8) __U);
78 extern __inline __m256d
79 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
80 _mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P)
82 return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
83 (__v4df) __W,
84 (__mmask8) __U);
87 extern __inline __m256d
88 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
89 _mm256_maskz_load_pd (__mmask8 __U, void const *__P)
91 return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
92 (__v4df)
93 _mm256_setzero_pd (),
94 (__mmask8) __U);
97 extern __inline __m128d
98 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
99 _mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P)
101 return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
102 (__v2df) __W,
103 (__mmask8) __U);
106 extern __inline __m128d
107 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
108 _mm_maskz_load_pd (__mmask8 __U, void const *__P)
110 return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
111 (__v2df)
112 _mm_setzero_pd (),
113 (__mmask8) __U);
116 extern __inline void
117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
118 _mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A)
120 __builtin_ia32_storeapd256_mask ((__v4df *) __P,
121 (__v4df) __A,
122 (__mmask8) __U);
125 extern __inline void
126 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
127 _mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A)
129 __builtin_ia32_storeapd128_mask ((__v2df *) __P,
130 (__v2df) __A,
131 (__mmask8) __U);
134 extern __inline __m256
135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
136 _mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A)
138 return (__m256) __builtin_ia32_movaps256_mask ((__v8sf) __A,
139 (__v8sf) __W,
140 (__mmask8) __U);
143 extern __inline __m256
144 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
145 _mm256_maskz_mov_ps (__mmask8 __U, __m256 __A)
147 return (__m256) __builtin_ia32_movaps256_mask ((__v8sf) __A,
148 (__v8sf)
149 _mm256_setzero_ps (),
150 (__mmask8) __U);
153 extern __inline __m128
154 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
155 _mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A)
157 return (__m128) __builtin_ia32_movaps128_mask ((__v4sf) __A,
158 (__v4sf) __W,
159 (__mmask8) __U);
162 extern __inline __m128
163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
164 _mm_maskz_mov_ps (__mmask8 __U, __m128 __A)
166 return (__m128) __builtin_ia32_movaps128_mask ((__v4sf) __A,
167 (__v4sf)
168 _mm_setzero_ps (),
169 (__mmask8) __U);
172 extern __inline __m256
173 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
174 _mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P)
176 return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
177 (__v8sf) __W,
178 (__mmask8) __U);
181 extern __inline __m256
182 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
183 _mm256_maskz_load_ps (__mmask8 __U, void const *__P)
185 return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
186 (__v8sf)
187 _mm256_setzero_ps (),
188 (__mmask8) __U);
191 extern __inline __m128
192 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
193 _mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P)
195 return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
196 (__v4sf) __W,
197 (__mmask8) __U);
200 extern __inline __m128
201 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
202 _mm_maskz_load_ps (__mmask8 __U, void const *__P)
204 return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
205 (__v4sf)
206 _mm_setzero_ps (),
207 (__mmask8) __U);
210 extern __inline void
211 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
212 _mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A)
214 __builtin_ia32_storeaps256_mask ((__v8sf *) __P,
215 (__v8sf) __A,
216 (__mmask8) __U);
219 extern __inline void
220 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
221 _mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A)
223 __builtin_ia32_storeaps128_mask ((__v4sf *) __P,
224 (__v4sf) __A,
225 (__mmask8) __U);
228 extern __inline __m256i
229 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
230 _mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
232 return (__m256i) __builtin_ia32_movdqa64_256_mask ((__v4di) __A,
233 (__v4di) __W,
234 (__mmask8) __U);
237 extern __inline __m256i
238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
239 _mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A)
241 return (__m256i) __builtin_ia32_movdqa64_256_mask ((__v4di) __A,
242 (__v4di)
243 _mm256_setzero_si256 (),
244 (__mmask8) __U);
247 extern __inline __m128i
248 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
249 _mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
251 return (__m128i) __builtin_ia32_movdqa64_128_mask ((__v2di) __A,
252 (__v2di) __W,
253 (__mmask8) __U);
256 extern __inline __m128i
257 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
258 _mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A)
260 return (__m128i) __builtin_ia32_movdqa64_128_mask ((__v2di) __A,
261 (__v2di)
262 _mm_setzero_si128 (),
263 (__mmask8) __U);
266 extern __inline __m256i
267 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
268 _mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
270 return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
271 (__v4di) __W,
272 (__mmask8)
273 __U);
276 extern __inline __m256i
277 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
278 _mm256_maskz_load_epi64 (__mmask8 __U, void const *__P)
280 return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
281 (__v4di)
282 _mm256_setzero_si256 (),
283 (__mmask8)
284 __U);
287 extern __inline __m128i
288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
289 _mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
291 return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
292 (__v2di) __W,
293 (__mmask8)
294 __U);
297 extern __inline __m128i
298 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
299 _mm_maskz_load_epi64 (__mmask8 __U, void const *__P)
301 return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
302 (__v2di)
303 _mm_setzero_si128 (),
304 (__mmask8)
305 __U);
308 extern __inline void
309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
310 _mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A)
312 __builtin_ia32_movdqa64store256_mask ((__v4di *) __P,
313 (__v4di) __A,
314 (__mmask8) __U);
317 extern __inline void
318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
319 _mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A)
321 __builtin_ia32_movdqa64store128_mask ((__v2di *) __P,
322 (__v2di) __A,
323 (__mmask8) __U);
326 extern __inline __m256i
327 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
328 _mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
330 return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A,
331 (__v8si) __W,
332 (__mmask8) __U);
335 extern __inline __m256i
336 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
337 _mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A)
339 return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A,
340 (__v8si)
341 _mm256_setzero_si256 (),
342 (__mmask8) __U);
345 extern __inline __m128i
346 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
347 _mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
349 return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A,
350 (__v4si) __W,
351 (__mmask8) __U);
354 extern __inline __m128i
355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
356 _mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A)
358 return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A,
359 (__v4si)
360 _mm_setzero_si128 (),
361 (__mmask8) __U);
364 extern __inline __m256i
365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
366 _mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
368 return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
369 (__v8si) __W,
370 (__mmask8)
371 __U);
374 extern __inline __m256i
375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
376 _mm256_maskz_load_epi32 (__mmask8 __U, void const *__P)
378 return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
379 (__v8si)
380 _mm256_setzero_si256 (),
381 (__mmask8)
382 __U);
385 extern __inline __m128i
386 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
387 _mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
389 return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
390 (__v4si) __W,
391 (__mmask8)
392 __U);
395 extern __inline __m128i
396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
397 _mm_maskz_load_epi32 (__mmask8 __U, void const *__P)
399 return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
400 (__v4si)
401 _mm_setzero_si128 (),
402 (__mmask8)
403 __U);
406 extern __inline void
407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
408 _mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A)
410 __builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
411 (__v8si) __A,
412 (__mmask8) __U);
415 extern __inline void
416 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
417 _mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
419 __builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
420 (__v4si) __A,
421 (__mmask8) __U);
424 extern __inline __m128d
425 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
426 _mm_mask_add_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
428 return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
429 (__v2df) __B,
430 (__v2df) __W,
431 (__mmask8) __U);
434 extern __inline __m128d
435 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
436 _mm_maskz_add_pd (__mmask8 __U, __m128d __A, __m128d __B)
438 return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
439 (__v2df) __B,
440 (__v2df)
441 _mm_setzero_pd (),
442 (__mmask8) __U);
445 extern __inline __m256d
446 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
447 _mm256_mask_add_pd (__m256d __W, __mmask8 __U, __m256d __A,
448 __m256d __B)
450 return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
451 (__v4df) __B,
452 (__v4df) __W,
453 (__mmask8) __U);
456 extern __inline __m256d
457 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
458 _mm256_maskz_add_pd (__mmask8 __U, __m256d __A, __m256d __B)
460 return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
461 (__v4df) __B,
462 (__v4df)
463 _mm256_setzero_pd (),
464 (__mmask8) __U);
467 extern __inline __m128
468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
469 _mm_mask_add_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
471 return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
472 (__v4sf) __B,
473 (__v4sf) __W,
474 (__mmask8) __U);
477 extern __inline __m128
478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
479 _mm_maskz_add_ps (__mmask8 __U, __m128 __A, __m128 __B)
481 return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
482 (__v4sf) __B,
483 (__v4sf)
484 _mm_setzero_ps (),
485 (__mmask8) __U);
488 extern __inline __m256
489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
490 _mm256_mask_add_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
492 return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
493 (__v8sf) __B,
494 (__v8sf) __W,
495 (__mmask8) __U);
498 extern __inline __m256
499 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
500 _mm256_maskz_add_ps (__mmask8 __U, __m256 __A, __m256 __B)
502 return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
503 (__v8sf) __B,
504 (__v8sf)
505 _mm256_setzero_ps (),
506 (__mmask8) __U);
509 extern __inline __m128d
510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
511 _mm_mask_sub_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
513 return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
514 (__v2df) __B,
515 (__v2df) __W,
516 (__mmask8) __U);
519 extern __inline __m128d
520 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
521 _mm_maskz_sub_pd (__mmask8 __U, __m128d __A, __m128d __B)
523 return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
524 (__v2df) __B,
525 (__v2df)
526 _mm_setzero_pd (),
527 (__mmask8) __U);
530 extern __inline __m256d
531 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
532 _mm256_mask_sub_pd (__m256d __W, __mmask8 __U, __m256d __A,
533 __m256d __B)
535 return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
536 (__v4df) __B,
537 (__v4df) __W,
538 (__mmask8) __U);
541 extern __inline __m256d
542 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
543 _mm256_maskz_sub_pd (__mmask8 __U, __m256d __A, __m256d __B)
545 return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
546 (__v4df) __B,
547 (__v4df)
548 _mm256_setzero_pd (),
549 (__mmask8) __U);
552 extern __inline __m128
553 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
554 _mm_mask_sub_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
556 return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
557 (__v4sf) __B,
558 (__v4sf) __W,
559 (__mmask8) __U);
562 extern __inline __m128
563 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
564 _mm_maskz_sub_ps (__mmask8 __U, __m128 __A, __m128 __B)
566 return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
567 (__v4sf) __B,
568 (__v4sf)
569 _mm_setzero_ps (),
570 (__mmask8) __U);
573 extern __inline __m256
574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
575 _mm256_mask_sub_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
577 return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
578 (__v8sf) __B,
579 (__v8sf) __W,
580 (__mmask8) __U);
583 extern __inline __m256
584 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
585 _mm256_maskz_sub_ps (__mmask8 __U, __m256 __A, __m256 __B)
587 return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
588 (__v8sf) __B,
589 (__v8sf)
590 _mm256_setzero_ps (),
591 (__mmask8) __U);
594 extern __inline void
595 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
596 _mm256_store_epi64 (void *__P, __m256i __A)
598 *(__m256i *) __P = __A;
601 extern __inline void
602 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
603 _mm_store_epi64 (void *__P, __m128i __A)
605 *(__m128i *) __P = __A;
608 extern __inline __m256d
609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
610 _mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P)
612 return (__m256d) __builtin_ia32_loadupd256_mask ((const double *) __P,
613 (__v4df) __W,
614 (__mmask8) __U);
617 extern __inline __m256d
618 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
619 _mm256_maskz_loadu_pd (__mmask8 __U, void const *__P)
621 return (__m256d) __builtin_ia32_loadupd256_mask ((const double *) __P,
622 (__v4df)
623 _mm256_setzero_pd (),
624 (__mmask8) __U);
627 extern __inline __m128d
628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
629 _mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P)
631 return (__m128d) __builtin_ia32_loadupd128_mask ((const double *) __P,
632 (__v2df) __W,
633 (__mmask8) __U);
636 extern __inline __m128d
637 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
638 _mm_maskz_loadu_pd (__mmask8 __U, void const *__P)
640 return (__m128d) __builtin_ia32_loadupd128_mask ((const double *) __P,
641 (__v2df)
642 _mm_setzero_pd (),
643 (__mmask8) __U);
646 extern __inline void
647 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
648 _mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A)
650 __builtin_ia32_storeupd256_mask ((double *) __P,
651 (__v4df) __A,
652 (__mmask8) __U);
655 extern __inline void
656 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
657 _mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A)
659 __builtin_ia32_storeupd128_mask ((double *) __P,
660 (__v2df) __A,
661 (__mmask8) __U);
664 extern __inline __m256
665 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
666 _mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P)
668 return (__m256) __builtin_ia32_loadups256_mask ((const float *) __P,
669 (__v8sf) __W,
670 (__mmask8) __U);
673 extern __inline __m256
674 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
675 _mm256_maskz_loadu_ps (__mmask8 __U, void const *__P)
677 return (__m256) __builtin_ia32_loadups256_mask ((const float *) __P,
678 (__v8sf)
679 _mm256_setzero_ps (),
680 (__mmask8) __U);
683 extern __inline __m128
684 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
685 _mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P)
687 return (__m128) __builtin_ia32_loadups128_mask ((const float *) __P,
688 (__v4sf) __W,
689 (__mmask8) __U);
692 extern __inline __m128
693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
694 _mm_maskz_loadu_ps (__mmask8 __U, void const *__P)
696 return (__m128) __builtin_ia32_loadups128_mask ((const float *) __P,
697 (__v4sf)
698 _mm_setzero_ps (),
699 (__mmask8) __U);
702 extern __inline void
703 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
704 _mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A)
706 __builtin_ia32_storeups256_mask ((float *) __P,
707 (__v8sf) __A,
708 (__mmask8) __U);
711 extern __inline void
712 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
713 _mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A)
715 __builtin_ia32_storeups128_mask ((float *) __P,
716 (__v4sf) __A,
717 (__mmask8) __U);
720 extern __inline __m256i
721 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
722 _mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
724 return (__m256i) __builtin_ia32_loaddqudi256_mask ((const long long *) __P,
725 (__v4di) __W,
726 (__mmask8) __U);
729 extern __inline __m256i
730 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
731 _mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
733 return (__m256i) __builtin_ia32_loaddqudi256_mask ((const long long *) __P,
734 (__v4di)
735 _mm256_setzero_si256 (),
736 (__mmask8) __U);
739 extern __inline __m128i
740 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
741 _mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
743 return (__m128i) __builtin_ia32_loaddqudi128_mask ((const long long *) __P,
744 (__v2di) __W,
745 (__mmask8) __U);
748 extern __inline __m128i
749 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
750 _mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
752 return (__m128i) __builtin_ia32_loaddqudi128_mask ((const long long *) __P,
753 (__v2di)
754 _mm_setzero_si128 (),
755 (__mmask8) __U);
758 extern __inline void
759 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
760 _mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
762 __builtin_ia32_storedqudi256_mask ((long long *) __P,
763 (__v4di) __A,
764 (__mmask8) __U);
767 extern __inline void
768 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
769 _mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
771 __builtin_ia32_storedqudi128_mask ((long long *) __P,
772 (__v2di) __A,
773 (__mmask8) __U);
776 extern __inline __m256i
777 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
778 _mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
780 return (__m256i) __builtin_ia32_loaddqusi256_mask ((const int *) __P,
781 (__v8si) __W,
782 (__mmask8) __U);
785 extern __inline __m256i
786 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
787 _mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
789 return (__m256i) __builtin_ia32_loaddqusi256_mask ((const int *) __P,
790 (__v8si)
791 _mm256_setzero_si256 (),
792 (__mmask8) __U);
795 extern __inline __m128i
796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
797 _mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
799 return (__m128i) __builtin_ia32_loaddqusi128_mask ((const int *) __P,
800 (__v4si) __W,
801 (__mmask8) __U);
804 extern __inline __m128i
805 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
806 _mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
808 return (__m128i) __builtin_ia32_loaddqusi128_mask ((const int *) __P,
809 (__v4si)
810 _mm_setzero_si128 (),
811 (__mmask8) __U);
814 extern __inline void
815 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
816 _mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A)
818 __builtin_ia32_storedqusi256_mask ((int *) __P,
819 (__v8si) __A,
820 (__mmask8) __U);
823 extern __inline void
824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
825 _mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
827 __builtin_ia32_storedqusi128_mask ((int *) __P,
828 (__v4si) __A,
829 (__mmask8) __U);
832 extern __inline __m256i
833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
834 _mm256_mask_abs_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
836 return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
837 (__v8si) __W,
838 (__mmask8) __U);
841 extern __inline __m256i
842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
843 _mm256_maskz_abs_epi32 (__mmask8 __U, __m256i __A)
845 return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
846 (__v8si)
847 _mm256_setzero_si256 (),
848 (__mmask8) __U);
851 extern __inline __m128i
852 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
853 _mm_mask_abs_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
855 return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
856 (__v4si) __W,
857 (__mmask8) __U);
860 extern __inline __m128i
861 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
862 _mm_maskz_abs_epi32 (__mmask8 __U, __m128i __A)
864 return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
865 (__v4si)
866 _mm_setzero_si128 (),
867 (__mmask8) __U);
870 extern __inline __m256i
871 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
872 _mm256_abs_epi64 (__m256i __A)
874 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
875 (__v4di)
876 _mm256_setzero_si256 (),
877 (__mmask8) -1);
880 extern __inline __m256i
881 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
882 _mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
884 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
885 (__v4di) __W,
886 (__mmask8) __U);
889 extern __inline __m256i
890 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
891 _mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A)
893 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
894 (__v4di)
895 _mm256_setzero_si256 (),
896 (__mmask8) __U);
899 extern __inline __m128i
900 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
901 _mm_abs_epi64 (__m128i __A)
903 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
904 (__v2di)
905 _mm_setzero_si128 (),
906 (__mmask8) -1);
909 extern __inline __m128i
910 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
911 _mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
913 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
914 (__v2di) __W,
915 (__mmask8) __U);
918 extern __inline __m128i
919 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
920 _mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A)
922 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
923 (__v2di)
924 _mm_setzero_si128 (),
925 (__mmask8) __U);
928 extern __inline __m128i
929 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
930 _mm256_cvtpd_epu32 (__m256d __A)
932 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
933 (__v4si)
934 _mm_setzero_si128 (),
935 (__mmask8) -1);
938 extern __inline __m128i
939 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
940 _mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A)
942 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
943 (__v4si) __W,
944 (__mmask8) __U);
947 extern __inline __m128i
948 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
949 _mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A)
951 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
952 (__v4si)
953 _mm_setzero_si128 (),
954 (__mmask8) __U);
957 extern __inline __m128i
958 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
959 _mm_cvtpd_epu32 (__m128d __A)
961 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
962 (__v4si)
963 _mm_setzero_si128 (),
964 (__mmask8) -1);
967 extern __inline __m128i
968 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
969 _mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A)
971 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
972 (__v4si) __W,
973 (__mmask8) __U);
976 extern __inline __m128i
977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
978 _mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A)
980 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
981 (__v4si)
982 _mm_setzero_si128 (),
983 (__mmask8) __U);
986 extern __inline __m256i
987 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
988 _mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A)
990 return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
991 (__v8si) __W,
992 (__mmask8) __U);
995 extern __inline __m256i
996 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
997 _mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A)
999 return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
1000 (__v8si)
1001 _mm256_setzero_si256 (),
1002 (__mmask8) __U);
1005 extern __inline __m128i
1006 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1007 _mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A)
1009 return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
1010 (__v4si) __W,
1011 (__mmask8) __U);
1014 extern __inline __m128i
1015 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1016 _mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A)
1018 return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
1019 (__v4si)
1020 _mm_setzero_si128 (),
1021 (__mmask8) __U);
1024 extern __inline __m256i
1025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1026 _mm256_cvttps_epu32 (__m256 __A)
1028 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
1029 (__v8si)
1030 _mm256_setzero_si256 (),
1031 (__mmask8) -1);
1034 extern __inline __m256i
1035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1036 _mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A)
1038 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
1039 (__v8si) __W,
1040 (__mmask8) __U);
1043 extern __inline __m256i
1044 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1045 _mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A)
1047 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
1048 (__v8si)
1049 _mm256_setzero_si256 (),
1050 (__mmask8) __U);
1053 extern __inline __m128i
1054 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1055 _mm_cvttps_epu32 (__m128 __A)
1057 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
1058 (__v4si)
1059 _mm_setzero_si128 (),
1060 (__mmask8) -1);
1063 extern __inline __m128i
1064 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1065 _mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A)
1067 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
1068 (__v4si) __W,
1069 (__mmask8) __U);
1072 extern __inline __m128i
1073 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1074 _mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A)
1076 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
1077 (__v4si)
1078 _mm_setzero_si128 (),
1079 (__mmask8) __U);
1082 extern __inline __m128i
1083 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1084 _mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A)
1086 return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
1087 (__v4si) __W,
1088 (__mmask8) __U);
1091 extern __inline __m128i
1092 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1093 _mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A)
1095 return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
1096 (__v4si)
1097 _mm_setzero_si128 (),
1098 (__mmask8) __U);
1101 extern __inline __m128i
1102 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1103 _mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A)
1105 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
1106 (__v4si) __W,
1107 (__mmask8) __U);
1110 extern __inline __m128i
1111 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1112 _mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A)
1114 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
1115 (__v4si)
1116 _mm_setzero_si128 (),
1117 (__mmask8) __U);
1120 extern __inline __m128i
1121 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1122 _mm256_cvttpd_epu32 (__m256d __A)
1124 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
1125 (__v4si)
1126 _mm_setzero_si128 (),
1127 (__mmask8) -1);
1130 extern __inline __m128i
1131 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1132 _mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A)
1134 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
1135 (__v4si) __W,
1136 (__mmask8) __U);
1139 extern __inline __m128i
1140 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1141 _mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A)
1143 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
1144 (__v4si)
1145 _mm_setzero_si128 (),
1146 (__mmask8) __U);
1149 extern __inline __m128i
1150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1151 _mm_cvttpd_epu32 (__m128d __A)
1153 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
1154 (__v4si)
1155 _mm_setzero_si128 (),
1156 (__mmask8) -1);
1159 extern __inline __m128i
1160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1161 _mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A)
1163 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
1164 (__v4si) __W,
1165 (__mmask8) __U);
1168 extern __inline __m128i
1169 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1170 _mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A)
1172 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
1173 (__v4si)
1174 _mm_setzero_si128 (),
1175 (__mmask8) __U);
1178 extern __inline __m128i
1179 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1180 _mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A)
1182 return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
1183 (__v4si) __W,
1184 (__mmask8) __U);
1187 extern __inline __m128i
1188 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1189 _mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A)
1191 return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
1192 (__v4si)
1193 _mm_setzero_si128 (),
1194 (__mmask8) __U);
1197 extern __inline __m128i
1198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1199 _mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A)
1201 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1202 (__v4si) __W,
1203 (__mmask8) __U);
1206 extern __inline __m128i
1207 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1208 _mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A)
1210 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1211 (__v4si)
1212 _mm_setzero_si128 (),
1213 (__mmask8) __U);
1216 extern __inline __m256d
1217 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1218 _mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A)
1220 return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
1221 (__v4df) __W,
1222 (__mmask8) __U);
1225 extern __inline __m256d
1226 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1227 _mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A)
1229 return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
1230 (__v4df)
1231 _mm256_setzero_pd (),
1232 (__mmask8) __U);
1235 extern __inline __m128d
1236 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1237 _mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A)
1239 return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
1240 (__v2df) __W,
1241 (__mmask8) __U);
1244 extern __inline __m128d
1245 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1246 _mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A)
1248 return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
1249 (__v2df)
1250 _mm_setzero_pd (),
1251 (__mmask8) __U);
1254 extern __inline __m256d
1255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1256 _mm256_cvtepu32_pd (__m128i __A)
1258 return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
1259 (__v4df)
1260 _mm256_setzero_pd (),
1261 (__mmask8) -1);
1264 extern __inline __m256d
1265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1266 _mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A)
1268 return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
1269 (__v4df) __W,
1270 (__mmask8) __U);
1273 extern __inline __m256d
1274 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1275 _mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A)
1277 return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
1278 (__v4df)
1279 _mm256_setzero_pd (),
1280 (__mmask8) __U);
1283 extern __inline __m128d
1284 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1285 _mm_cvtepu32_pd (__m128i __A)
1287 return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
1288 (__v2df)
1289 _mm_setzero_pd (),
1290 (__mmask8) -1);
1293 extern __inline __m128d
1294 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1295 _mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A)
1297 return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
1298 (__v2df) __W,
1299 (__mmask8) __U);
1302 extern __inline __m128d
1303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1304 _mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A)
1306 return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
1307 (__v2df)
1308 _mm_setzero_pd (),
1309 (__mmask8) __U);
1312 extern __inline __m256
1313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1314 _mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A)
1316 return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
1317 (__v8sf) __W,
1318 (__mmask8) __U);
1321 extern __inline __m256
1322 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1323 _mm256_maskz_cvtepi32_ps (__mmask8 __U, __m256i __A)
1325 return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
1326 (__v8sf)
1327 _mm256_setzero_ps (),
1328 (__mmask8) __U);
1331 extern __inline __m128
1332 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1333 _mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A)
1335 return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
1336 (__v4sf) __W,
1337 (__mmask8) __U);
1340 extern __inline __m128
1341 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1342 _mm_maskz_cvtepi32_ps (__mmask8 __U, __m128i __A)
1344 return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
1345 (__v4sf)
1346 _mm_setzero_ps (),
1347 (__mmask8) __U);
1350 extern __inline __m256
1351 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1352 _mm256_cvtepu32_ps (__m256i __A)
1354 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
1355 (__v8sf)
1356 _mm256_setzero_ps (),
1357 (__mmask8) -1);
1360 extern __inline __m256
1361 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1362 _mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A)
1364 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
1365 (__v8sf) __W,
1366 (__mmask8) __U);
1369 extern __inline __m256
1370 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1371 _mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A)
1373 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
1374 (__v8sf)
1375 _mm256_setzero_ps (),
1376 (__mmask8) __U);
1379 extern __inline __m128
1380 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1381 _mm_cvtepu32_ps (__m128i __A)
1383 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
1384 (__v4sf)
1385 _mm_setzero_ps (),
1386 (__mmask8) -1);
1389 extern __inline __m128
1390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1391 _mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A)
1393 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
1394 (__v4sf) __W,
1395 (__mmask8) __U);
1398 extern __inline __m128
1399 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1400 _mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A)
1402 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
1403 (__v4sf)
1404 _mm_setzero_ps (),
1405 (__mmask8) __U);
1408 extern __inline __m256d
1409 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1410 _mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A)
1412 return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
1413 (__v4df) __W,
1414 (__mmask8) __U);
1417 extern __inline __m256d
1418 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1419 _mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A)
1421 return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
1422 (__v4df)
1423 _mm256_setzero_pd (),
1424 (__mmask8) __U);
1427 extern __inline __m128d
1428 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1429 _mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A)
1431 return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
1432 (__v2df) __W,
1433 (__mmask8) __U);
1436 extern __inline __m128d
1437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1438 _mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A)
1440 return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
1441 (__v2df)
1442 _mm_setzero_pd (),
1443 (__mmask8) __U);
1446 extern __inline __m128i
1447 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1448 _mm_cvtepi32_epi8 (__m128i __A)
1450 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
1451 (__v16qi)
1452 _mm_undefined_si128 (),
1453 (__mmask8) -1);
1456 extern __inline void
1457 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1458 _mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1460 __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
1463 extern __inline __m128i
1464 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1465 _mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1467 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
1468 (__v16qi) __O, __M);
1471 extern __inline __m128i
1472 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1473 _mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A)
1475 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
1476 (__v16qi)
1477 _mm_setzero_si128 (),
1478 __M);
1481 extern __inline __m128i
1482 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1483 _mm256_cvtepi32_epi8 (__m256i __A)
1485 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
1486 (__v16qi)
1487 _mm_undefined_si128 (),
1488 (__mmask8) -1);
1491 extern __inline __m128i
1492 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1493 _mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1495 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
1496 (__v16qi) __O, __M);
1499 extern __inline void
1500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1501 _mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1503 __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
1506 extern __inline __m128i
1507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1508 _mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A)
1510 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
1511 (__v16qi)
1512 _mm_setzero_si128 (),
1513 __M);
1516 extern __inline __m128i
1517 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1518 _mm_cvtsepi32_epi8 (__m128i __A)
1520 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
1521 (__v16qi)
1522 _mm_undefined_si128 (),
1523 (__mmask8) -1);
1526 extern __inline void
1527 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1528 _mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1530 __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
1533 extern __inline __m128i
1534 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1535 _mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1537 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
1538 (__v16qi) __O, __M);
1541 extern __inline __m128i
1542 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1543 _mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A)
1545 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
1546 (__v16qi)
1547 _mm_setzero_si128 (),
1548 __M);
1551 extern __inline __m128i
1552 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1553 _mm256_cvtsepi32_epi8 (__m256i __A)
1555 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
1556 (__v16qi)
1557 _mm_undefined_si128 (),
1558 (__mmask8) -1);
1561 extern __inline void
1562 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1563 _mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1565 __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
1568 extern __inline __m128i
1569 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1570 _mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1572 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
1573 (__v16qi) __O, __M);
1576 extern __inline __m128i
1577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1578 _mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A)
1580 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
1581 (__v16qi)
1582 _mm_setzero_si128 (),
1583 __M);
1586 extern __inline __m128i
1587 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1588 _mm_cvtusepi32_epi8 (__m128i __A)
1590 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
1591 (__v16qi)
1592 _mm_undefined_si128 (),
1593 (__mmask8) -1);
1596 extern __inline void
1597 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1598 _mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1600 __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
1603 extern __inline __m128i
1604 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1605 _mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1607 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
1608 (__v16qi) __O,
1609 __M);
1612 extern __inline __m128i
1613 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1614 _mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A)
1616 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
1617 (__v16qi)
1618 _mm_setzero_si128 (),
1619 __M);
1622 extern __inline __m128i
1623 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1624 _mm256_cvtusepi32_epi8 (__m256i __A)
1626 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
1627 (__v16qi)
1628 _mm_undefined_si128 (),
1629 (__mmask8) -1);
1632 extern __inline void
1633 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1634 _mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1636 __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M);
1639 extern __inline __m128i
1640 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1641 _mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1643 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
1644 (__v16qi) __O,
1645 __M);
1648 extern __inline __m128i
1649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1650 _mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A)
1652 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
1653 (__v16qi)
1654 _mm_setzero_si128 (),
1655 __M);
1658 extern __inline __m128i
1659 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1660 _mm_cvtepi32_epi16 (__m128i __A)
1662 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
1663 (__v8hi)
1664 _mm_setzero_si128 (),
1665 (__mmask8) -1);
1668 extern __inline void
1669 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1670 _mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
1672 __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
1675 extern __inline __m128i
1676 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1677 _mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
1679 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
1680 (__v8hi) __O, __M);
1683 extern __inline __m128i
1684 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1685 _mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A)
1687 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
1688 (__v8hi)
1689 _mm_setzero_si128 (),
1690 __M);
1693 extern __inline __m128i
1694 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1695 _mm256_cvtepi32_epi16 (__m256i __A)
1697 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
1698 (__v8hi)
1699 _mm_setzero_si128 (),
1700 (__mmask8) -1);
1703 extern __inline void
1704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1705 _mm256_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
1707 __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
1710 extern __inline __m128i
1711 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1712 _mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
1714 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
1715 (__v8hi) __O, __M);
1718 extern __inline __m128i
1719 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1720 _mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A)
1722 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
1723 (__v8hi)
1724 _mm_setzero_si128 (),
1725 __M);
1728 extern __inline __m128i
1729 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1730 _mm_cvtsepi32_epi16 (__m128i __A)
1732 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
1733 (__v8hi)
1734 _mm_setzero_si128 (),
1735 (__mmask8) -1);
1738 extern __inline void
1739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1740 _mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
1742 __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
1745 extern __inline __m128i
1746 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1747 _mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
1749 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
1750 (__v8hi)__O,
1751 __M);
1754 extern __inline __m128i
1755 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1756 _mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A)
1758 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
1759 (__v8hi)
1760 _mm_setzero_si128 (),
1761 __M);
1764 extern __inline __m128i
1765 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1766 _mm256_cvtsepi32_epi16 (__m256i __A)
1768 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
1769 (__v8hi)
1770 _mm_undefined_si128 (),
1771 (__mmask8) -1);
1774 extern __inline void
1775 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1776 _mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
1778 __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
1781 extern __inline __m128i
1782 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1783 _mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
1785 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
1786 (__v8hi) __O, __M);
1789 extern __inline __m128i
1790 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1791 _mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A)
1793 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
1794 (__v8hi)
1795 _mm_setzero_si128 (),
1796 __M);
1799 extern __inline __m128i
1800 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1801 _mm_cvtusepi32_epi16 (__m128i __A)
1803 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
1804 (__v8hi)
1805 _mm_undefined_si128 (),
1806 (__mmask8) -1);
1809 extern __inline void
1810 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1811 _mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
1813 __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
1816 extern __inline __m128i
1817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1818 _mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
1820 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
1821 (__v8hi) __O, __M);
1824 extern __inline __m128i
1825 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1826 _mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A)
1828 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
1829 (__v8hi)
1830 _mm_setzero_si128 (),
1831 __M);
1834 extern __inline __m128i
1835 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1836 _mm256_cvtusepi32_epi16 (__m256i __A)
1838 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
1839 (__v8hi)
1840 _mm_undefined_si128 (),
1841 (__mmask8) -1);
1844 extern __inline void
1845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1846 _mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
1848 __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
1851 extern __inline __m128i
1852 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1853 _mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
1855 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
1856 (__v8hi) __O, __M);
1859 extern __inline __m128i
1860 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1861 _mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A)
1863 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
1864 (__v8hi)
1865 _mm_setzero_si128 (),
1866 __M);
1869 extern __inline __m128i
1870 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1871 _mm_cvtepi64_epi8 (__m128i __A)
1873 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
1874 (__v16qi)
1875 _mm_undefined_si128 (),
1876 (__mmask8) -1);
1879 extern __inline void
1880 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1881 _mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1883 __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
1886 extern __inline __m128i
1887 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1888 _mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1890 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
1891 (__v16qi) __O, __M);
1894 extern __inline __m128i
1895 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1896 _mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A)
1898 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
1899 (__v16qi)
1900 _mm_setzero_si128 (),
1901 __M);
1904 extern __inline __m128i
1905 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1906 _mm256_cvtepi64_epi8 (__m256i __A)
1908 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
1909 (__v16qi)
1910 _mm_undefined_si128 (),
1911 (__mmask8) -1);
1914 extern __inline void
1915 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1916 _mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1918 __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
1921 extern __inline __m128i
1922 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1923 _mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1925 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
1926 (__v16qi) __O, __M);
1929 extern __inline __m128i
1930 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1931 _mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A)
1933 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
1934 (__v16qi)
1935 _mm_setzero_si128 (),
1936 __M);
1939 extern __inline __m128i
1940 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1941 _mm_cvtsepi64_epi8 (__m128i __A)
1943 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
1944 (__v16qi)
1945 _mm_undefined_si128 (),
1946 (__mmask8) -1);
1949 extern __inline void
1950 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1951 _mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1953 __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
1956 extern __inline __m128i
1957 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1958 _mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1960 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
1961 (__v16qi) __O, __M);
1964 extern __inline __m128i
1965 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1966 _mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A)
1968 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
1969 (__v16qi)
1970 _mm_setzero_si128 (),
1971 __M);
1974 extern __inline __m128i
1975 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1976 _mm256_cvtsepi64_epi8 (__m256i __A)
1978 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
1979 (__v16qi)
1980 _mm_undefined_si128 (),
1981 (__mmask8) -1);
1984 extern __inline void
1985 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1986 _mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1988 __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
1991 extern __inline __m128i
1992 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1993 _mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1995 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
1996 (__v16qi) __O, __M);
1999 extern __inline __m128i
2000 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2001 _mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A)
2003 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
2004 (__v16qi)
2005 _mm_setzero_si128 (),
2006 __M);
2009 extern __inline __m128i
2010 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2011 _mm_cvtusepi64_epi8 (__m128i __A)
2013 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
2014 (__v16qi)
2015 _mm_undefined_si128 (),
2016 (__mmask8) -1);
2019 extern __inline void
2020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2021 _mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
2023 __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
2026 extern __inline __m128i
2027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2028 _mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
2030 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
2031 (__v16qi) __O,
2032 __M);
2035 extern __inline __m128i
2036 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2037 _mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A)
2039 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
2040 (__v16qi)
2041 _mm_setzero_si128 (),
2042 __M);
2045 extern __inline __m128i
2046 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2047 _mm256_cvtusepi64_epi8 (__m256i __A)
2049 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
2050 (__v16qi)
2051 _mm_undefined_si128 (),
2052 (__mmask8) -1);
2055 extern __inline void
2056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2057 _mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
2059 __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
2062 extern __inline __m128i
2063 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2064 _mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
2066 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
2067 (__v16qi) __O,
2068 __M);
2071 extern __inline __m128i
2072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2073 _mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A)
2075 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
2076 (__v16qi)
2077 _mm_setzero_si128 (),
2078 __M);
2081 extern __inline __m128i
2082 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2083 _mm_cvtepi64_epi16 (__m128i __A)
2085 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
2086 (__v8hi)
2087 _mm_undefined_si128 (),
2088 (__mmask8) -1);
2091 extern __inline void
2092 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2093 _mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
2095 __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
2098 extern __inline __m128i
2099 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2100 _mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
2102 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
2103 (__v8hi)__O,
2104 __M);
2107 extern __inline __m128i
2108 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2109 _mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A)
2111 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
2112 (__v8hi)
2113 _mm_setzero_si128 (),
2114 __M);
2117 extern __inline __m128i
2118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2119 _mm256_cvtepi64_epi16 (__m256i __A)
2121 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
2122 (__v8hi)
2123 _mm_undefined_si128 (),
2124 (__mmask8) -1);
2127 extern __inline void
2128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2129 _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
2131 __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
2134 extern __inline __m128i
2135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2136 _mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
2138 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
2139 (__v8hi) __O, __M);
2142 extern __inline __m128i
2143 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2144 _mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A)
2146 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
2147 (__v8hi)
2148 _mm_setzero_si128 (),
2149 __M);
2152 extern __inline __m128i
2153 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2154 _mm_cvtsepi64_epi16 (__m128i __A)
2156 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
2157 (__v8hi)
2158 _mm_undefined_si128 (),
2159 (__mmask8) -1);
2162 extern __inline void
2163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2164 _mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
2166 __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
2169 extern __inline __m128i
2170 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2171 _mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
2173 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
2174 (__v8hi) __O, __M);
2177 extern __inline __m128i
2178 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2179 _mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A)
2181 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
2182 (__v8hi)
2183 _mm_setzero_si128 (),
2184 __M);
2187 extern __inline __m128i
2188 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2189 _mm256_cvtsepi64_epi16 (__m256i __A)
2191 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
2192 (__v8hi)
2193 _mm_undefined_si128 (),
2194 (__mmask8) -1);
2197 extern __inline void
2198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2199 _mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
2201 __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
2204 extern __inline __m128i
2205 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2206 _mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
2208 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
2209 (__v8hi) __O, __M);
2212 extern __inline __m128i
2213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2214 _mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A)
2216 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
2217 (__v8hi)
2218 _mm_setzero_si128 (),
2219 __M);
2222 extern __inline __m128i
2223 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2224 _mm_cvtusepi64_epi16 (__m128i __A)
2226 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
2227 (__v8hi)
2228 _mm_undefined_si128 (),
2229 (__mmask8) -1);
2232 extern __inline void
2233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2234 _mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
2236 __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
2239 extern __inline __m128i
2240 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2241 _mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
2243 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
2244 (__v8hi) __O, __M);
2247 extern __inline __m128i
2248 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2249 _mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A)
2251 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
2252 (__v8hi)
2253 _mm_setzero_si128 (),
2254 __M);
2257 extern __inline __m128i
2258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2259 _mm256_cvtusepi64_epi16 (__m256i __A)
2261 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
2262 (__v8hi)
2263 _mm_undefined_si128 (),
2264 (__mmask8) -1);
2267 extern __inline void
2268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2269 _mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
2271 __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
2274 extern __inline __m128i
2275 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2276 _mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
2278 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
2279 (__v8hi) __O, __M);
2282 extern __inline __m128i
2283 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2284 _mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A)
2286 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
2287 (__v8hi)
2288 _mm_setzero_si128 (),
2289 __M);
2292 extern __inline __m128i
2293 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2294 _mm_cvtepi64_epi32 (__m128i __A)
2296 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
2297 (__v4si)
2298 _mm_undefined_si128 (),
2299 (__mmask8) -1);
2302 extern __inline void
2303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2304 _mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
2306 __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
2309 extern __inline __m128i
2310 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2311 _mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2313 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
2314 (__v4si) __O, __M);
2317 extern __inline __m128i
2318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2319 _mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A)
2321 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
2322 (__v4si)
2323 _mm_setzero_si128 (),
2324 __M);
2327 extern __inline __m128i
2328 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2329 _mm256_cvtepi64_epi32 (__m256i __A)
2331 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
2332 (__v4si)
2333 _mm_undefined_si128 (),
2334 (__mmask8) -1);
2337 extern __inline void
2338 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2339 _mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
2341 __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
2344 extern __inline __m128i
2345 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2346 _mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
2348 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
2349 (__v4si) __O, __M);
2352 extern __inline __m128i
2353 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2354 _mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A)
2356 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
2357 (__v4si)
2358 _mm_setzero_si128 (),
2359 __M);
2362 extern __inline __m128i
2363 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2364 _mm_cvtsepi64_epi32 (__m128i __A)
2366 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
2367 (__v4si)
2368 _mm_undefined_si128 (),
2369 (__mmask8) -1);
2372 extern __inline void
2373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2374 _mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
2376 __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
2379 extern __inline __m128i
2380 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2381 _mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2383 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
2384 (__v4si) __O, __M);
2387 extern __inline __m128i
2388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2389 _mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A)
2391 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
2392 (__v4si)
2393 _mm_setzero_si128 (),
2394 __M);
2397 extern __inline __m128i
2398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2399 _mm256_cvtsepi64_epi32 (__m256i __A)
2401 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
2402 (__v4si)
2403 _mm_undefined_si128 (),
2404 (__mmask8) -1);
2407 extern __inline void
2408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2409 _mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
2411 __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
2414 extern __inline __m128i
2415 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2416 _mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
2418 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
2419 (__v4si)__O,
2420 __M);
2423 extern __inline __m128i
2424 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2425 _mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A)
2427 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
2428 (__v4si)
2429 _mm_setzero_si128 (),
2430 __M);
2433 extern __inline __m128i
2434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2435 _mm_cvtusepi64_epi32 (__m128i __A)
2437 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
2438 (__v4si)
2439 _mm_undefined_si128 (),
2440 (__mmask8) -1);
2443 extern __inline void
2444 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2445 _mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
2447 __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
2450 extern __inline __m128i
2451 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2452 _mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2454 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
2455 (__v4si) __O, __M);
2458 extern __inline __m128i
2459 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2460 _mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A)
2462 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
2463 (__v4si)
2464 _mm_setzero_si128 (),
2465 __M);
2468 extern __inline __m128i
2469 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2470 _mm256_cvtusepi64_epi32 (__m256i __A)
2472 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
2473 (__v4si)
2474 _mm_undefined_si128 (),
2475 (__mmask8) -1);
2478 extern __inline void
2479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2480 _mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
2482 __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
2485 extern __inline __m128i
2486 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2487 _mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
2489 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
2490 (__v4si) __O, __M);
2493 extern __inline __m128i
2494 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2495 _mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A)
2497 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
2498 (__v4si)
2499 _mm_setzero_si128 (),
2500 __M);
2503 extern __inline __m256
2504 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2505 _mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A)
2507 return (__m256) __builtin_ia32_broadcastss256_mask ((__v4sf) __A,
2508 (__v8sf) __O,
2509 __M);
2512 extern __inline __m256
2513 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2514 _mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
2516 return (__m256) __builtin_ia32_broadcastss256_mask ((__v4sf) __A,
2517 (__v8sf)
2518 _mm256_setzero_ps (),
2519 __M);
2522 extern __inline __m128
2523 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2524 _mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A)
2526 return (__m128) __builtin_ia32_broadcastss128_mask ((__v4sf) __A,
2527 (__v4sf) __O,
2528 __M);
2531 extern __inline __m128
2532 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2533 _mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
2535 return (__m128) __builtin_ia32_broadcastss128_mask ((__v4sf) __A,
2536 (__v4sf)
2537 _mm_setzero_ps (),
2538 __M);
2541 extern __inline __m256d
2542 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2543 _mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A)
2545 return (__m256d) __builtin_ia32_broadcastsd256_mask ((__v2df) __A,
2546 (__v4df) __O,
2547 __M);
2550 extern __inline __m256d
2551 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2552 _mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
2554 return (__m256d) __builtin_ia32_broadcastsd256_mask ((__v2df) __A,
2555 (__v4df)
2556 _mm256_setzero_pd (),
2557 __M);
2560 extern __inline __m256i
2561 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2562 _mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A)
2564 return (__m256i) __builtin_ia32_pbroadcastd256_mask ((__v4si) __A,
2565 (__v8si) __O,
2566 __M);
2569 extern __inline __m256i
2570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2571 _mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
2573 return (__m256i) __builtin_ia32_pbroadcastd256_mask ((__v4si) __A,
2574 (__v8si)
2575 _mm256_setzero_si256 (),
2576 __M);
2579 extern __inline __m256i
2580 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2581 _mm256_mask_set1_epi32 (__m256i __O, __mmask8 __M, int __A)
2583 return (__m256i) __builtin_ia32_pbroadcastd256_gpr_mask (__A, (__v8si) __O,
2584 __M);
2587 extern __inline __m256i
2588 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2589 _mm256_maskz_set1_epi32 (__mmask8 __M, int __A)
2591 return (__m256i) __builtin_ia32_pbroadcastd256_gpr_mask (__A,
2592 (__v8si)
2593 _mm256_setzero_si256 (),
2594 __M);
2597 extern __inline __m128i
2598 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2599 _mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2601 return (__m128i) __builtin_ia32_pbroadcastd128_mask ((__v4si) __A,
2602 (__v4si) __O,
2603 __M);
2606 extern __inline __m128i
2607 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2608 _mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
2610 return (__m128i) __builtin_ia32_pbroadcastd128_mask ((__v4si) __A,
2611 (__v4si)
2612 _mm_setzero_si128 (),
2613 __M);
2616 extern __inline __m128i
2617 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2618 _mm_mask_set1_epi32 (__m128i __O, __mmask8 __M, int __A)
2620 return (__m128i) __builtin_ia32_pbroadcastd128_gpr_mask (__A, (__v4si) __O,
2621 __M);
2624 extern __inline __m128i
2625 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2626 _mm_maskz_set1_epi32 (__mmask8 __M, int __A)
2628 return (__m128i)
2629 __builtin_ia32_pbroadcastd128_gpr_mask (__A,
2630 (__v4si) _mm_setzero_si128 (),
2631 __M);
2634 extern __inline __m256i
2635 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2636 _mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A)
2638 return (__m256i) __builtin_ia32_pbroadcastq256_mask ((__v2di) __A,
2639 (__v4di) __O,
2640 __M);
2643 extern __inline __m256i
2644 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2645 _mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
2647 return (__m256i) __builtin_ia32_pbroadcastq256_mask ((__v2di) __A,
2648 (__v4di)
2649 _mm256_setzero_si256 (),
2650 __M);
2653 extern __inline __m256i
2654 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2655 _mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
2657 return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A, (__v4di) __O,
2658 __M);
2661 extern __inline __m256i
2662 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2663 _mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
2665 return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A,
2666 (__v4di)
2667 _mm256_setzero_si256 (),
2668 __M);
2671 extern __inline __m128i
2672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2673 _mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A)
2675 return (__m128i) __builtin_ia32_pbroadcastq128_mask ((__v2di) __A,
2676 (__v2di) __O,
2677 __M);
2680 extern __inline __m128i
2681 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2682 _mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
2684 return (__m128i) __builtin_ia32_pbroadcastq128_mask ((__v2di) __A,
2685 (__v2di)
2686 _mm_setzero_si128 (),
2687 __M);
2690 extern __inline __m128i
2691 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2692 _mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
2694 return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A, (__v2di) __O,
2695 __M);
2698 extern __inline __m128i
2699 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2700 _mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
2702 return (__m128i)
2703 __builtin_ia32_pbroadcastq128_gpr_mask (__A,
2704 (__v2di) _mm_setzero_si128 (),
2705 __M);
2708 extern __inline __m256
2709 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2710 _mm256_broadcast_f32x4 (__m128 __A)
2712 return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
2713 (__v8sf)_mm256_undefined_pd (),
2714 (__mmask8) -1);
2717 extern __inline __m256
2718 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2719 _mm256_mask_broadcast_f32x4 (__m256 __O, __mmask8 __M, __m128 __A)
2721 return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
2722 (__v8sf) __O,
2723 __M);
2726 extern __inline __m256
2727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2728 _mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A)
2730 return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
2731 (__v8sf)
2732 _mm256_setzero_ps (),
2733 __M);
2736 extern __inline __m256i
2737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2738 _mm256_broadcast_i32x4 (__m128i __A)
2740 return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
2741 __A,
2742 (__v8si)_mm256_undefined_si256 (),
2743 (__mmask8) -1);
2746 extern __inline __m256i
2747 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2748 _mm256_mask_broadcast_i32x4 (__m256i __O, __mmask8 __M, __m128i __A)
2750 return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
2751 __A,
2752 (__v8si)
2753 __O, __M);
2756 extern __inline __m256i
2757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2758 _mm256_maskz_broadcast_i32x4 (__mmask8 __M, __m128i __A)
2760 return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
2761 __A,
2762 (__v8si)
2763 _mm256_setzero_si256 (),
2764 __M);
2767 extern __inline __m256i
2768 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2769 _mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
2771 return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A,
2772 (__v8si) __W,
2773 (__mmask8) __U);
2776 extern __inline __m256i
2777 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2778 _mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
2780 return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A,
2781 (__v8si)
2782 _mm256_setzero_si256 (),
2783 (__mmask8) __U);
2786 extern __inline __m128i
2787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2788 _mm_mask_cvtepi8_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
2790 return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A,
2791 (__v4si) __W,
2792 (__mmask8) __U);
2795 extern __inline __m128i
2796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2797 _mm_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
2799 return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A,
2800 (__v4si)
2801 _mm_setzero_si128 (),
2802 (__mmask8) __U);
2805 extern __inline __m256i
2806 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2807 _mm256_mask_cvtepi8_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
2809 return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A,
2810 (__v4di) __W,
2811 (__mmask8) __U);
2814 extern __inline __m256i
2815 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2816 _mm256_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
2818 return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A,
2819 (__v4di)
2820 _mm256_setzero_si256 (),
2821 (__mmask8) __U);
2824 extern __inline __m128i
2825 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2826 _mm_mask_cvtepi8_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
2828 return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A,
2829 (__v2di) __W,
2830 (__mmask8) __U);
2833 extern __inline __m128i
2834 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2835 _mm_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
2837 return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A,
2838 (__v2di)
2839 _mm_setzero_si128 (),
2840 (__mmask8) __U);
2843 extern __inline __m256i
2844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2845 _mm256_mask_cvtepi16_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
2847 return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A,
2848 (__v8si) __W,
2849 (__mmask8) __U);
2852 extern __inline __m256i
2853 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2854 _mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
2856 return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A,
2857 (__v8si)
2858 _mm256_setzero_si256 (),
2859 (__mmask8) __U);
2862 extern __inline __m128i
2863 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2864 _mm_mask_cvtepi16_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
2866 return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A,
2867 (__v4si) __W,
2868 (__mmask8) __U);
2871 extern __inline __m128i
2872 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2873 _mm_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
2875 return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A,
2876 (__v4si)
2877 _mm_setzero_si128 (),
2878 (__mmask8) __U);
2881 extern __inline __m256i
2882 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2883 _mm256_mask_cvtepi16_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
2885 return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A,
2886 (__v4di) __W,
2887 (__mmask8) __U);
2890 extern __inline __m256i
2891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2892 _mm256_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
2894 return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A,
2895 (__v4di)
2896 _mm256_setzero_si256 (),
2897 (__mmask8) __U);
2900 extern __inline __m128i
2901 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2902 _mm_mask_cvtepi16_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
2904 return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A,
2905 (__v2di) __W,
2906 (__mmask8) __U);
2909 extern __inline __m128i
2910 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2911 _mm_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
2913 return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A,
2914 (__v2di)
2915 _mm_setzero_si128 (),
2916 (__mmask8) __U);
2919 extern __inline __m256i
2920 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2921 _mm256_mask_cvtepi32_epi64 (__m256i __W, __mmask8 __U, __m128i __X)
2923 return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X,
2924 (__v4di) __W,
2925 (__mmask8) __U);
2928 extern __inline __m256i
2929 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2930 _mm256_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X)
2932 return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X,
2933 (__v4di)
2934 _mm256_setzero_si256 (),
2935 (__mmask8) __U);
2938 extern __inline __m128i
2939 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2940 _mm_mask_cvtepi32_epi64 (__m128i __W, __mmask8 __U, __m128i __X)
2942 return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X,
2943 (__v2di) __W,
2944 (__mmask8) __U);
2947 extern __inline __m128i
2948 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2949 _mm_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X)
2951 return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X,
2952 (__v2di)
2953 _mm_setzero_si128 (),
2954 (__mmask8) __U);
2957 extern __inline __m256i
2958 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2959 _mm256_mask_cvtepu8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
2961 return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A,
2962 (__v8si) __W,
2963 (__mmask8) __U);
2966 extern __inline __m256i
2967 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2968 _mm256_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A)
2970 return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A,
2971 (__v8si)
2972 _mm256_setzero_si256 (),
2973 (__mmask8) __U);
2976 extern __inline __m128i
2977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2978 _mm_mask_cvtepu8_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
2980 return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A,
2981 (__v4si) __W,
2982 (__mmask8) __U);
2985 extern __inline __m128i
2986 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2987 _mm_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A)
2989 return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A,
2990 (__v4si)
2991 _mm_setzero_si128 (),
2992 (__mmask8) __U);
2995 extern __inline __m256i
2996 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2997 _mm256_mask_cvtepu8_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
2999 return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A,
3000 (__v4di) __W,
3001 (__mmask8) __U);
3004 extern __inline __m256i
3005 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3006 _mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
3008 return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A,
3009 (__v4di)
3010 _mm256_setzero_si256 (),
3011 (__mmask8) __U);
3014 extern __inline __m128i
3015 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3016 _mm_mask_cvtepu8_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
3018 return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A,
3019 (__v2di) __W,
3020 (__mmask8) __U);
3023 extern __inline __m128i
3024 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3025 _mm_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
3027 return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A,
3028 (__v2di)
3029 _mm_setzero_si128 (),
3030 (__mmask8) __U);
3033 extern __inline __m256i
3034 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3035 _mm256_mask_cvtepu16_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
3037 return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A,
3038 (__v8si) __W,
3039 (__mmask8) __U);
3042 extern __inline __m256i
3043 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3044 _mm256_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A)
3046 return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A,
3047 (__v8si)
3048 _mm256_setzero_si256 (),
3049 (__mmask8) __U);
3052 extern __inline __m128i
3053 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3054 _mm_mask_cvtepu16_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
3056 return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A,
3057 (__v4si) __W,
3058 (__mmask8) __U);
3061 extern __inline __m128i
3062 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3063 _mm_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A)
3065 return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A,
3066 (__v4si)
3067 _mm_setzero_si128 (),
3068 (__mmask8) __U);
3071 extern __inline __m256i
3072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3073 _mm256_mask_cvtepu16_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
3075 return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A,
3076 (__v4di) __W,
3077 (__mmask8) __U);
3080 extern __inline __m256i
3081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3082 _mm256_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
3084 return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A,
3085 (__v4di)
3086 _mm256_setzero_si256 (),
3087 (__mmask8) __U);
3090 extern __inline __m128i
3091 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3092 _mm_mask_cvtepu16_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
3094 return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A,
3095 (__v2di) __W,
3096 (__mmask8) __U);
3099 extern __inline __m128i
3100 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3101 _mm_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
3103 return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A,
3104 (__v2di)
3105 _mm_setzero_si128 (),
3106 (__mmask8) __U);
3109 extern __inline __m256i
3110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3111 _mm256_mask_cvtepu32_epi64 (__m256i __W, __mmask8 __U, __m128i __X)
3113 return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X,
3114 (__v4di) __W,
3115 (__mmask8) __U);
3118 extern __inline __m256i
3119 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3120 _mm256_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X)
3122 return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X,
3123 (__v4di)
3124 _mm256_setzero_si256 (),
3125 (__mmask8) __U);
3128 extern __inline __m128i
3129 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3130 _mm_mask_cvtepu32_epi64 (__m128i __W, __mmask8 __U, __m128i __X)
3132 return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X,
3133 (__v2di) __W,
3134 (__mmask8) __U);
3137 extern __inline __m128i
3138 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3139 _mm_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X)
3141 return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X,
3142 (__v2di)
3143 _mm_setzero_si128 (),
3144 (__mmask8) __U);
3147 extern __inline __m256d
3148 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3149 _mm256_rcp14_pd (__m256d __A)
3151 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
3152 (__v4df)
3153 _mm256_setzero_pd (),
3154 (__mmask8) -1);
3157 extern __inline __m256d
3158 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3159 _mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A)
3161 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
3162 (__v4df) __W,
3163 (__mmask8) __U);
3166 extern __inline __m256d
3167 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3168 _mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A)
3170 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
3171 (__v4df)
3172 _mm256_setzero_pd (),
3173 (__mmask8) __U);
3176 extern __inline __m128d
3177 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3178 _mm_rcp14_pd (__m128d __A)
3180 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
3181 (__v2df)
3182 _mm_setzero_pd (),
3183 (__mmask8) -1);
3186 extern __inline __m128d
3187 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3188 _mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A)
3190 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
3191 (__v2df) __W,
3192 (__mmask8) __U);
3195 extern __inline __m128d
3196 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3197 _mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A)
3199 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
3200 (__v2df)
3201 _mm_setzero_pd (),
3202 (__mmask8) __U);
3205 extern __inline __m256
3206 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3207 _mm256_rcp14_ps (__m256 __A)
3209 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
3210 (__v8sf)
3211 _mm256_setzero_ps (),
3212 (__mmask8) -1);
3215 extern __inline __m256
3216 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3217 _mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A)
3219 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
3220 (__v8sf) __W,
3221 (__mmask8) __U);
3224 extern __inline __m256
3225 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3226 _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A)
3228 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
3229 (__v8sf)
3230 _mm256_setzero_ps (),
3231 (__mmask8) __U);
3234 extern __inline __m128
3235 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3236 _mm_rcp14_ps (__m128 __A)
3238 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
3239 (__v4sf)
3240 _mm_setzero_ps (),
3241 (__mmask8) -1);
3244 extern __inline __m128
3245 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3246 _mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A)
3248 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
3249 (__v4sf) __W,
3250 (__mmask8) __U);
3253 extern __inline __m128
3254 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3255 _mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A)
3257 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
3258 (__v4sf)
3259 _mm_setzero_ps (),
3260 (__mmask8) __U);
3263 extern __inline __m256d
3264 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3265 _mm256_rsqrt14_pd (__m256d __A)
3267 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
3268 (__v4df)
3269 _mm256_setzero_pd (),
3270 (__mmask8) -1);
3273 extern __inline __m256d
3274 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3275 _mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A)
3277 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
3278 (__v4df) __W,
3279 (__mmask8) __U);
3282 extern __inline __m256d
3283 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3284 _mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A)
3286 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
3287 (__v4df)
3288 _mm256_setzero_pd (),
3289 (__mmask8) __U);
3292 extern __inline __m128d
3293 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3294 _mm_rsqrt14_pd (__m128d __A)
3296 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
3297 (__v2df)
3298 _mm_setzero_pd (),
3299 (__mmask8) -1);
3302 extern __inline __m128d
3303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3304 _mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A)
3306 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
3307 (__v2df) __W,
3308 (__mmask8) __U);
3311 extern __inline __m128d
3312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3313 _mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A)
3315 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
3316 (__v2df)
3317 _mm_setzero_pd (),
3318 (__mmask8) __U);
3321 extern __inline __m256
3322 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3323 _mm256_rsqrt14_ps (__m256 __A)
3325 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
3326 (__v8sf)
3327 _mm256_setzero_ps (),
3328 (__mmask8) -1);
3331 extern __inline __m256
3332 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3333 _mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A)
3335 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
3336 (__v8sf) __W,
3337 (__mmask8) __U);
3340 extern __inline __m256
3341 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3342 _mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A)
3344 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
3345 (__v8sf)
3346 _mm256_setzero_ps (),
3347 (__mmask8) __U);
3350 extern __inline __m128
3351 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3352 _mm_rsqrt14_ps (__m128 __A)
3354 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
3355 (__v4sf)
3356 _mm_setzero_ps (),
3357 (__mmask8) -1);
3360 extern __inline __m128
3361 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3362 _mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A)
3364 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
3365 (__v4sf) __W,
3366 (__mmask8) __U);
3369 extern __inline __m128
3370 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3371 _mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A)
3373 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
3374 (__v4sf)
3375 _mm_setzero_ps (),
3376 (__mmask8) __U);
3379 extern __inline __m256d
3380 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3381 _mm256_mask_sqrt_pd (__m256d __W, __mmask8 __U, __m256d __A)
3383 return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
3384 (__v4df) __W,
3385 (__mmask8) __U);
3388 extern __inline __m256d
3389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3390 _mm256_maskz_sqrt_pd (__mmask8 __U, __m256d __A)
3392 return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
3393 (__v4df)
3394 _mm256_setzero_pd (),
3395 (__mmask8) __U);
3398 extern __inline __m128d
3399 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3400 _mm_mask_sqrt_pd (__m128d __W, __mmask8 __U, __m128d __A)
3402 return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
3403 (__v2df) __W,
3404 (__mmask8) __U);
3407 extern __inline __m128d
3408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3409 _mm_maskz_sqrt_pd (__mmask8 __U, __m128d __A)
3411 return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
3412 (__v2df)
3413 _mm_setzero_pd (),
3414 (__mmask8) __U);
3417 extern __inline __m256
3418 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3419 _mm256_mask_sqrt_ps (__m256 __W, __mmask8 __U, __m256 __A)
3421 return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
3422 (__v8sf) __W,
3423 (__mmask8) __U);
3426 extern __inline __m256
3427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3428 _mm256_maskz_sqrt_ps (__mmask8 __U, __m256 __A)
3430 return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
3431 (__v8sf)
3432 _mm256_setzero_ps (),
3433 (__mmask8) __U);
3436 extern __inline __m128
3437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3438 _mm_mask_sqrt_ps (__m128 __W, __mmask8 __U, __m128 __A)
3440 return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
3441 (__v4sf) __W,
3442 (__mmask8) __U);
3445 extern __inline __m128
3446 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3447 _mm_maskz_sqrt_ps (__mmask8 __U, __m128 __A)
3449 return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
3450 (__v4sf)
3451 _mm_setzero_ps (),
3452 (__mmask8) __U);
3455 extern __inline __m256i
3456 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3457 _mm256_mask_add_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3458 __m256i __B)
3460 return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
3461 (__v8si) __B,
3462 (__v8si) __W,
3463 (__mmask8) __U);
3466 extern __inline __m256i
3467 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3468 _mm256_maskz_add_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
3470 return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
3471 (__v8si) __B,
3472 (__v8si)
3473 _mm256_setzero_si256 (),
3474 (__mmask8) __U);
3477 extern __inline __m256i
3478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3479 _mm256_mask_add_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
3480 __m256i __B)
3482 return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
3483 (__v4di) __B,
3484 (__v4di) __W,
3485 (__mmask8) __U);
3488 extern __inline __m256i
3489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3490 _mm256_maskz_add_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
3492 return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
3493 (__v4di) __B,
3494 (__v4di)
3495 _mm256_setzero_si256 (),
3496 (__mmask8) __U);
3499 extern __inline __m256i
3500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3501 _mm256_mask_sub_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3502 __m256i __B)
3504 return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
3505 (__v8si) __B,
3506 (__v8si) __W,
3507 (__mmask8) __U);
3510 extern __inline __m256i
3511 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3512 _mm256_maskz_sub_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
3514 return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
3515 (__v8si) __B,
3516 (__v8si)
3517 _mm256_setzero_si256 (),
3518 (__mmask8) __U);
3521 extern __inline __m256i
3522 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3523 _mm256_mask_sub_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
3524 __m256i __B)
3526 return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
3527 (__v4di) __B,
3528 (__v4di) __W,
3529 (__mmask8) __U);
3532 extern __inline __m256i
3533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3534 _mm256_maskz_sub_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
3536 return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
3537 (__v4di) __B,
3538 (__v4di)
3539 _mm256_setzero_si256 (),
3540 (__mmask8) __U);
3543 extern __inline __m128i
3544 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3545 _mm_mask_add_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
3546 __m128i __B)
3548 return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
3549 (__v4si) __B,
3550 (__v4si) __W,
3551 (__mmask8) __U);
3554 extern __inline __m128i
3555 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3556 _mm_maskz_add_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
3558 return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
3559 (__v4si) __B,
3560 (__v4si)
3561 _mm_setzero_si128 (),
3562 (__mmask8) __U);
3565 extern __inline __m128i
3566 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3567 _mm_mask_add_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
3568 __m128i __B)
3570 return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
3571 (__v2di) __B,
3572 (__v2di) __W,
3573 (__mmask8) __U);
3576 extern __inline __m128i
3577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3578 _mm_maskz_add_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
3580 return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
3581 (__v2di) __B,
3582 (__v2di)
3583 _mm_setzero_si128 (),
3584 (__mmask8) __U);
3587 extern __inline __m128i
3588 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3589 _mm_mask_sub_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
3590 __m128i __B)
3592 return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
3593 (__v4si) __B,
3594 (__v4si) __W,
3595 (__mmask8) __U);
3598 extern __inline __m128i
3599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3600 _mm_maskz_sub_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
3602 return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
3603 (__v4si) __B,
3604 (__v4si)
3605 _mm_setzero_si128 (),
3606 (__mmask8) __U);
3609 extern __inline __m128i
3610 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3611 _mm_mask_sub_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
3612 __m128i __B)
3614 return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
3615 (__v2di) __B,
3616 (__v2di) __W,
3617 (__mmask8) __U);
3620 extern __inline __m128i
3621 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3622 _mm_maskz_sub_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
3624 return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
3625 (__v2di) __B,
3626 (__v2di)
3627 _mm_setzero_si128 (),
3628 (__mmask8) __U);
3631 extern __inline __m256
3632 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3633 _mm256_getexp_ps (__m256 __A)
3635 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3636 (__v8sf)
3637 _mm256_setzero_ps (),
3638 (__mmask8) -1);
3641 extern __inline __m256
3642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3643 _mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A)
3645 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3646 (__v8sf) __W,
3647 (__mmask8) __U);
3650 extern __inline __m256
3651 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3652 _mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A)
3654 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3655 (__v8sf)
3656 _mm256_setzero_ps (),
3657 (__mmask8) __U);
3660 extern __inline __m256d
3661 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3662 _mm256_getexp_pd (__m256d __A)
3664 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3665 (__v4df)
3666 _mm256_setzero_pd (),
3667 (__mmask8) -1);
3670 extern __inline __m256d
3671 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3672 _mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A)
3674 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3675 (__v4df) __W,
3676 (__mmask8) __U);
3679 extern __inline __m256d
3680 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3681 _mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A)
3683 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3684 (__v4df)
3685 _mm256_setzero_pd (),
3686 (__mmask8) __U);
3689 extern __inline __m128
3690 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3691 _mm_getexp_ps (__m128 __A)
3693 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3694 (__v4sf)
3695 _mm_setzero_ps (),
3696 (__mmask8) -1);
3699 extern __inline __m128
3700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3701 _mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A)
3703 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3704 (__v4sf) __W,
3705 (__mmask8) __U);
3708 extern __inline __m128
3709 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3710 _mm_maskz_getexp_ps (__mmask8 __U, __m128 __A)
3712 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3713 (__v4sf)
3714 _mm_setzero_ps (),
3715 (__mmask8) __U);
3718 extern __inline __m128d
3719 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3720 _mm_getexp_pd (__m128d __A)
3722 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3723 (__v2df)
3724 _mm_setzero_pd (),
3725 (__mmask8) -1);
3728 extern __inline __m128d
3729 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3730 _mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A)
3732 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3733 (__v2df) __W,
3734 (__mmask8) __U);
3737 extern __inline __m128d
3738 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3739 _mm_maskz_getexp_pd (__mmask8 __U, __m128d __A)
3741 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3742 (__v2df)
3743 _mm_setzero_pd (),
3744 (__mmask8) __U);
3747 extern __inline __m256i
3748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3749 _mm256_mask_srl_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3750 __m128i __B)
3752 return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A,
3753 (__v4si) __B,
3754 (__v8si) __W,
3755 (__mmask8) __U);
3758 extern __inline __m256i
3759 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3760 _mm256_maskz_srl_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
3762 return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A,
3763 (__v4si) __B,
3764 (__v8si)
3765 _mm256_setzero_si256 (),
3766 (__mmask8) __U);
3769 extern __inline __m128i
3770 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3771 _mm_mask_srl_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
3772 __m128i __B)
3774 return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A,
3775 (__v4si) __B,
3776 (__v4si) __W,
3777 (__mmask8) __U);
3780 extern __inline __m128i
3781 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3782 _mm_maskz_srl_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
3784 return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A,
3785 (__v4si) __B,
3786 (__v4si)
3787 _mm_setzero_si128 (),
3788 (__mmask8) __U);
3791 extern __inline __m256i
3792 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3793 _mm256_mask_srl_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
3794 __m128i __B)
3796 return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A,
3797 (__v2di) __B,
3798 (__v4di) __W,
3799 (__mmask8) __U);
3802 extern __inline __m256i
3803 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3804 _mm256_maskz_srl_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
3806 return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A,
3807 (__v2di) __B,
3808 (__v4di)
3809 _mm256_setzero_si256 (),
3810 (__mmask8) __U);
3813 extern __inline __m128i
3814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3815 _mm_mask_srl_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
3816 __m128i __B)
3818 return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
3819 (__v2di) __B,
3820 (__v2di) __W,
3821 (__mmask8) __U);
3824 extern __inline __m128i
3825 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3826 _mm_maskz_srl_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
3828 return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
3829 (__v2di) __B,
3830 (__v2di)
3831 _mm_setzero_si128 (),
3832 (__mmask8) __U);
3835 extern __inline __m256i
3836 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3837 _mm256_mask_and_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3838 __m256i __B)
3840 return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A,
3841 (__v8si) __B,
3842 (__v8si) __W,
3843 (__mmask8) __U);
3846 extern __inline __m256i
3847 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3848 _mm256_maskz_and_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
3850 return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A,
3851 (__v8si) __B,
3852 (__v8si)
3853 _mm256_setzero_si256 (),
3854 (__mmask8) __U);
3857 extern __inline __m256d
3858 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3859 _mm256_scalef_pd (__m256d __A, __m256d __B)
3861 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3862 (__v4df) __B,
3863 (__v4df)
3864 _mm256_setzero_pd (),
3865 (__mmask8) -1);
3868 extern __inline __m256d
3869 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3870 _mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A,
3871 __m256d __B)
3873 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3874 (__v4df) __B,
3875 (__v4df) __W,
3876 (__mmask8) __U);
3879 extern __inline __m256d
3880 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3881 _mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B)
3883 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3884 (__v4df) __B,
3885 (__v4df)
3886 _mm256_setzero_pd (),
3887 (__mmask8) __U);
3890 extern __inline __m256
3891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3892 _mm256_scalef_ps (__m256 __A, __m256 __B)
3894 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3895 (__v8sf) __B,
3896 (__v8sf)
3897 _mm256_setzero_ps (),
3898 (__mmask8) -1);
3901 extern __inline __m256
3902 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3903 _mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A,
3904 __m256 __B)
3906 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3907 (__v8sf) __B,
3908 (__v8sf) __W,
3909 (__mmask8) __U);
3912 extern __inline __m256
3913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3914 _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B)
3916 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3917 (__v8sf) __B,
3918 (__v8sf)
3919 _mm256_setzero_ps (),
3920 (__mmask8) __U);
3923 extern __inline __m128d
3924 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3925 _mm_scalef_pd (__m128d __A, __m128d __B)
3927 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3928 (__v2df) __B,
3929 (__v2df)
3930 _mm_setzero_pd (),
3931 (__mmask8) -1);
3934 extern __inline __m128d
3935 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3936 _mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A,
3937 __m128d __B)
3939 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3940 (__v2df) __B,
3941 (__v2df) __W,
3942 (__mmask8) __U);
3945 extern __inline __m128d
3946 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3947 _mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B)
3949 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3950 (__v2df) __B,
3951 (__v2df)
3952 _mm_setzero_pd (),
3953 (__mmask8) __U);
3956 extern __inline __m128
3957 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3958 _mm_scalef_ps (__m128 __A, __m128 __B)
3960 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3961 (__v4sf) __B,
3962 (__v4sf)
3963 _mm_setzero_ps (),
3964 (__mmask8) -1);
3967 extern __inline __m128
3968 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3969 _mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
3971 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3972 (__v4sf) __B,
3973 (__v4sf) __W,
3974 (__mmask8) __U);
3977 extern __inline __m128
3978 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3979 _mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B)
3981 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3982 (__v4sf) __B,
3983 (__v4sf)
3984 _mm_setzero_ps (),
3985 (__mmask8) __U);
3988 extern __inline __m256d
3989 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3990 _mm256_mask_fmadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
3991 __m256d __C)
3993 return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
3994 (__v4df) __B,
3995 (__v4df) __C,
3996 (__mmask8) __U);
3999 extern __inline __m256d
4000 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4001 _mm256_mask3_fmadd_pd (__m256d __A, __m256d __B, __m256d __C,
4002 __mmask8 __U)
4004 return (__m256d) __builtin_ia32_vfmaddpd256_mask3 ((__v4df) __A,
4005 (__v4df) __B,
4006 (__v4df) __C,
4007 (__mmask8) __U);
4010 extern __inline __m256d
4011 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4012 _mm256_maskz_fmadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
4013 __m256d __C)
4015 return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
4016 (__v4df) __B,
4017 (__v4df) __C,
4018 (__mmask8) __U);
4021 extern __inline __m128d
4022 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4023 _mm_mask_fmadd_pd (__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
4025 return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
4026 (__v2df) __B,
4027 (__v2df) __C,
4028 (__mmask8) __U);
4031 extern __inline __m128d
4032 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4033 _mm_mask3_fmadd_pd (__m128d __A, __m128d __B, __m128d __C,
4034 __mmask8 __U)
4036 return (__m128d) __builtin_ia32_vfmaddpd128_mask3 ((__v2df) __A,
4037 (__v2df) __B,
4038 (__v2df) __C,
4039 (__mmask8) __U);
4042 extern __inline __m128d
4043 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4044 _mm_maskz_fmadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
4045 __m128d __C)
4047 return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
4048 (__v2df) __B,
4049 (__v2df) __C,
4050 (__mmask8) __U);
4053 extern __inline __m256
4054 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4055 _mm256_mask_fmadd_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
4057 return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
4058 (__v8sf) __B,
4059 (__v8sf) __C,
4060 (__mmask8) __U);
4063 extern __inline __m256
4064 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4065 _mm256_mask3_fmadd_ps (__m256 __A, __m256 __B, __m256 __C,
4066 __mmask8 __U)
4068 return (__m256) __builtin_ia32_vfmaddps256_mask3 ((__v8sf) __A,
4069 (__v8sf) __B,
4070 (__v8sf) __C,
4071 (__mmask8) __U);
4074 extern __inline __m256
4075 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4076 _mm256_maskz_fmadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
4077 __m256 __C)
4079 return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
4080 (__v8sf) __B,
4081 (__v8sf) __C,
4082 (__mmask8) __U);
4085 extern __inline __m128
4086 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4087 _mm_mask_fmadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4089 return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
4090 (__v4sf) __B,
4091 (__v4sf) __C,
4092 (__mmask8) __U);
4095 extern __inline __m128
4096 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4097 _mm_mask3_fmadd_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4099 return (__m128) __builtin_ia32_vfmaddps128_mask3 ((__v4sf) __A,
4100 (__v4sf) __B,
4101 (__v4sf) __C,
4102 (__mmask8) __U);
4105 extern __inline __m128
4106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4107 _mm_maskz_fmadd_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4109 return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
4110 (__v4sf) __B,
4111 (__v4sf) __C,
4112 (__mmask8) __U);
4115 extern __inline __m256d
4116 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4117 _mm256_mask_fmsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
4118 __m256d __C)
4120 return (__m256d) __builtin_ia32_vfmsubpd256_mask ((__v4df) __A,
4121 (__v4df) __B,
4122 (__v4df) __C,
4123 (__mmask8) __U);
4126 extern __inline __m256d
4127 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4128 _mm256_mask3_fmsub_pd (__m256d __A, __m256d __B, __m256d __C,
4129 __mmask8 __U)
4131 return (__m256d) __builtin_ia32_vfmsubpd256_mask3 ((__v4df) __A,
4132 (__v4df) __B,
4133 (__v4df) __C,
4134 (__mmask8) __U);
4137 extern __inline __m256d
4138 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4139 _mm256_maskz_fmsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
4140 __m256d __C)
4142 return (__m256d) __builtin_ia32_vfmsubpd256_maskz ((__v4df) __A,
4143 (__v4df) __B,
4144 (__v4df) __C,
4145 (__mmask8) __U);
4148 extern __inline __m128d
4149 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4150 _mm_mask_fmsub_pd (__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
4152 return (__m128d) __builtin_ia32_vfmsubpd128_mask ((__v2df) __A,
4153 (__v2df) __B,
4154 (__v2df) __C,
4155 (__mmask8) __U);
4158 extern __inline __m128d
4159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4160 _mm_mask3_fmsub_pd (__m128d __A, __m128d __B, __m128d __C,
4161 __mmask8 __U)
4163 return (__m128d) __builtin_ia32_vfmsubpd128_mask3 ((__v2df) __A,
4164 (__v2df) __B,
4165 (__v2df) __C,
4166 (__mmask8) __U);
4169 extern __inline __m128d
4170 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4171 _mm_maskz_fmsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
4172 __m128d __C)
4174 return (__m128d) __builtin_ia32_vfmsubpd128_maskz ((__v2df) __A,
4175 (__v2df) __B,
4176 (__v2df) __C,
4177 (__mmask8) __U);
4180 extern __inline __m256
4181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4182 _mm256_mask_fmsub_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
4184 return (__m256) __builtin_ia32_vfmsubps256_mask ((__v8sf) __A,
4185 (__v8sf) __B,
4186 (__v8sf) __C,
4187 (__mmask8) __U);
4190 extern __inline __m256
4191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4192 _mm256_mask3_fmsub_ps (__m256 __A, __m256 __B, __m256 __C,
4193 __mmask8 __U)
4195 return (__m256) __builtin_ia32_vfmsubps256_mask3 ((__v8sf) __A,
4196 (__v8sf) __B,
4197 (__v8sf) __C,
4198 (__mmask8) __U);
4201 extern __inline __m256
4202 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4203 _mm256_maskz_fmsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
4204 __m256 __C)
4206 return (__m256) __builtin_ia32_vfmsubps256_maskz ((__v8sf) __A,
4207 (__v8sf) __B,
4208 (__v8sf) __C,
4209 (__mmask8) __U);
4212 extern __inline __m128
4213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4214 _mm_mask_fmsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4216 return (__m128) __builtin_ia32_vfmsubps128_mask ((__v4sf) __A,
4217 (__v4sf) __B,
4218 (__v4sf) __C,
4219 (__mmask8) __U);
4222 extern __inline __m128
4223 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4224 _mm_mask3_fmsub_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4226 return (__m128) __builtin_ia32_vfmsubps128_mask3 ((__v4sf) __A,
4227 (__v4sf) __B,
4228 (__v4sf) __C,
4229 (__mmask8) __U);
4232 extern __inline __m128
4233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4234 _mm_maskz_fmsub_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4236 return (__m128) __builtin_ia32_vfmsubps128_maskz ((__v4sf) __A,
4237 (__v4sf) __B,
4238 (__v4sf) __C,
4239 (__mmask8) __U);
4242 extern __inline __m256d
4243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4244 _mm256_mask_fmaddsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
4245 __m256d __C)
4247 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
4248 (__v4df) __B,
4249 (__v4df) __C,
4250 (__mmask8) __U);
4253 extern __inline __m256d
4254 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4255 _mm256_mask3_fmaddsub_pd (__m256d __A, __m256d __B, __m256d __C,
4256 __mmask8 __U)
4258 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask3 ((__v4df) __A,
4259 (__v4df) __B,
4260 (__v4df) __C,
4261 (__mmask8)
4262 __U);
4265 extern __inline __m256d
4266 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4267 _mm256_maskz_fmaddsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
4268 __m256d __C)
4270 return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
4271 (__v4df) __B,
4272 (__v4df) __C,
4273 (__mmask8)
4274 __U);
4277 extern __inline __m128d
4278 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4279 _mm_mask_fmaddsub_pd (__m128d __A, __mmask8 __U, __m128d __B,
4280 __m128d __C)
4282 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
4283 (__v2df) __B,
4284 (__v2df) __C,
4285 (__mmask8) __U);
4288 extern __inline __m128d
4289 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4290 _mm_mask3_fmaddsub_pd (__m128d __A, __m128d __B, __m128d __C,
4291 __mmask8 __U)
4293 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask3 ((__v2df) __A,
4294 (__v2df) __B,
4295 (__v2df) __C,
4296 (__mmask8)
4297 __U);
4300 extern __inline __m128d
4301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4302 _mm_maskz_fmaddsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
4303 __m128d __C)
4305 return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
4306 (__v2df) __B,
4307 (__v2df) __C,
4308 (__mmask8)
4309 __U);
4312 extern __inline __m256
4313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4314 _mm256_mask_fmaddsub_ps (__m256 __A, __mmask8 __U, __m256 __B,
4315 __m256 __C)
4317 return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
4318 (__v8sf) __B,
4319 (__v8sf) __C,
4320 (__mmask8) __U);
4323 extern __inline __m256
4324 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4325 _mm256_mask3_fmaddsub_ps (__m256 __A, __m256 __B, __m256 __C,
4326 __mmask8 __U)
4328 return (__m256) __builtin_ia32_vfmaddsubps256_mask3 ((__v8sf) __A,
4329 (__v8sf) __B,
4330 (__v8sf) __C,
4331 (__mmask8) __U);
4334 extern __inline __m256
4335 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4336 _mm256_maskz_fmaddsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
4337 __m256 __C)
4339 return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
4340 (__v8sf) __B,
4341 (__v8sf) __C,
4342 (__mmask8) __U);
4345 extern __inline __m128
4346 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4347 _mm_mask_fmaddsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4349 return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
4350 (__v4sf) __B,
4351 (__v4sf) __C,
4352 (__mmask8) __U);
4355 extern __inline __m128
4356 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4357 _mm_mask3_fmaddsub_ps (__m128 __A, __m128 __B, __m128 __C,
4358 __mmask8 __U)
4360 return (__m128) __builtin_ia32_vfmaddsubps128_mask3 ((__v4sf) __A,
4361 (__v4sf) __B,
4362 (__v4sf) __C,
4363 (__mmask8) __U);
4366 extern __inline __m128
4367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4368 _mm_maskz_fmaddsub_ps (__mmask8 __U, __m128 __A, __m128 __B,
4369 __m128 __C)
4371 return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
4372 (__v4sf) __B,
4373 (__v4sf) __C,
4374 (__mmask8) __U);
4377 extern __inline __m256d
4378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4379 _mm256_mask_fmsubadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
4380 __m256d __C)
4382 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
4383 (__v4df) __B,
4384 -(__v4df) __C,
4385 (__mmask8) __U);
4388 extern __inline __m256d
4389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4390 _mm256_mask3_fmsubadd_pd (__m256d __A, __m256d __B, __m256d __C,
4391 __mmask8 __U)
4393 return (__m256d) __builtin_ia32_vfmsubaddpd256_mask3 ((__v4df) __A,
4394 (__v4df) __B,
4395 (__v4df) __C,
4396 (__mmask8)
4397 __U);
4400 extern __inline __m256d
4401 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4402 _mm256_maskz_fmsubadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
4403 __m256d __C)
4405 return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
4406 (__v4df) __B,
4407 -(__v4df) __C,
4408 (__mmask8)
4409 __U);
4412 extern __inline __m128d
4413 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4414 _mm_mask_fmsubadd_pd (__m128d __A, __mmask8 __U, __m128d __B,
4415 __m128d __C)
4417 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
4418 (__v2df) __B,
4419 -(__v2df) __C,
4420 (__mmask8) __U);
4423 extern __inline __m128d
4424 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4425 _mm_mask3_fmsubadd_pd (__m128d __A, __m128d __B, __m128d __C,
4426 __mmask8 __U)
4428 return (__m128d) __builtin_ia32_vfmsubaddpd128_mask3 ((__v2df) __A,
4429 (__v2df) __B,
4430 (__v2df) __C,
4431 (__mmask8)
4432 __U);
4435 extern __inline __m128d
4436 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4437 _mm_maskz_fmsubadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
4438 __m128d __C)
4440 return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
4441 (__v2df) __B,
4442 -(__v2df) __C,
4443 (__mmask8)
4444 __U);
4447 extern __inline __m256
4448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4449 _mm256_mask_fmsubadd_ps (__m256 __A, __mmask8 __U, __m256 __B,
4450 __m256 __C)
4452 return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
4453 (__v8sf) __B,
4454 -(__v8sf) __C,
4455 (__mmask8) __U);
4458 extern __inline __m256
4459 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4460 _mm256_mask3_fmsubadd_ps (__m256 __A, __m256 __B, __m256 __C,
4461 __mmask8 __U)
4463 return (__m256) __builtin_ia32_vfmsubaddps256_mask3 ((__v8sf) __A,
4464 (__v8sf) __B,
4465 (__v8sf) __C,
4466 (__mmask8) __U);
4469 extern __inline __m256
4470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4471 _mm256_maskz_fmsubadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
4472 __m256 __C)
4474 return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
4475 (__v8sf) __B,
4476 -(__v8sf) __C,
4477 (__mmask8) __U);
4480 extern __inline __m128
4481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4482 _mm_mask_fmsubadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4484 return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
4485 (__v4sf) __B,
4486 -(__v4sf) __C,
4487 (__mmask8) __U);
4490 extern __inline __m128
4491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4492 _mm_mask3_fmsubadd_ps (__m128 __A, __m128 __B, __m128 __C,
4493 __mmask8 __U)
4495 return (__m128) __builtin_ia32_vfmsubaddps128_mask3 ((__v4sf) __A,
4496 (__v4sf) __B,
4497 (__v4sf) __C,
4498 (__mmask8) __U);
4501 extern __inline __m128
4502 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4503 _mm_maskz_fmsubadd_ps (__mmask8 __U, __m128 __A, __m128 __B,
4504 __m128 __C)
4506 return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
4507 (__v4sf) __B,
4508 -(__v4sf) __C,
4509 (__mmask8) __U);
4512 extern __inline __m256d
4513 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4514 _mm256_mask_fnmadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
4515 __m256d __C)
4517 return (__m256d) __builtin_ia32_vfnmaddpd256_mask ((__v4df) __A,
4518 (__v4df) __B,
4519 (__v4df) __C,
4520 (__mmask8) __U);
4523 extern __inline __m256d
4524 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4525 _mm256_mask3_fnmadd_pd (__m256d __A, __m256d __B, __m256d __C,
4526 __mmask8 __U)
4528 return (__m256d) __builtin_ia32_vfnmaddpd256_mask3 ((__v4df) __A,
4529 (__v4df) __B,
4530 (__v4df) __C,
4531 (__mmask8) __U);
4534 extern __inline __m256d
4535 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4536 _mm256_maskz_fnmadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
4537 __m256d __C)
4539 return (__m256d) __builtin_ia32_vfnmaddpd256_maskz ((__v4df) __A,
4540 (__v4df) __B,
4541 (__v4df) __C,
4542 (__mmask8) __U);
4545 extern __inline __m128d
4546 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4547 _mm_mask_fnmadd_pd (__m128d __A, __mmask8 __U, __m128d __B,
4548 __m128d __C)
4550 return (__m128d) __builtin_ia32_vfnmaddpd128_mask ((__v2df) __A,
4551 (__v2df) __B,
4552 (__v2df) __C,
4553 (__mmask8) __U);
4556 extern __inline __m128d
4557 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4558 _mm_mask3_fnmadd_pd (__m128d __A, __m128d __B, __m128d __C,
4559 __mmask8 __U)
4561 return (__m128d) __builtin_ia32_vfnmaddpd128_mask3 ((__v2df) __A,
4562 (__v2df) __B,
4563 (__v2df) __C,
4564 (__mmask8) __U);
4567 extern __inline __m128d
4568 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4569 _mm_maskz_fnmadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
4570 __m128d __C)
4572 return (__m128d) __builtin_ia32_vfnmaddpd128_maskz ((__v2df) __A,
4573 (__v2df) __B,
4574 (__v2df) __C,
4575 (__mmask8) __U);
4578 extern __inline __m256
4579 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4580 _mm256_mask_fnmadd_ps (__m256 __A, __mmask8 __U, __m256 __B,
4581 __m256 __C)
4583 return (__m256) __builtin_ia32_vfnmaddps256_mask ((__v8sf) __A,
4584 (__v8sf) __B,
4585 (__v8sf) __C,
4586 (__mmask8) __U);
4589 extern __inline __m256
4590 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4591 _mm256_mask3_fnmadd_ps (__m256 __A, __m256 __B, __m256 __C,
4592 __mmask8 __U)
4594 return (__m256) __builtin_ia32_vfnmaddps256_mask3 ((__v8sf) __A,
4595 (__v8sf) __B,
4596 (__v8sf) __C,
4597 (__mmask8) __U);
4600 extern __inline __m256
4601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4602 _mm256_maskz_fnmadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
4603 __m256 __C)
4605 return (__m256) __builtin_ia32_vfnmaddps256_maskz ((__v8sf) __A,
4606 (__v8sf) __B,
4607 (__v8sf) __C,
4608 (__mmask8) __U);
4611 extern __inline __m128
4612 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4613 _mm_mask_fnmadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4615 return (__m128) __builtin_ia32_vfnmaddps128_mask ((__v4sf) __A,
4616 (__v4sf) __B,
4617 (__v4sf) __C,
4618 (__mmask8) __U);
4621 extern __inline __m128
4622 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4623 _mm_mask3_fnmadd_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4625 return (__m128) __builtin_ia32_vfnmaddps128_mask3 ((__v4sf) __A,
4626 (__v4sf) __B,
4627 (__v4sf) __C,
4628 (__mmask8) __U);
4631 extern __inline __m128
4632 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4633 _mm_maskz_fnmadd_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4635 return (__m128) __builtin_ia32_vfnmaddps128_maskz ((__v4sf) __A,
4636 (__v4sf) __B,
4637 (__v4sf) __C,
4638 (__mmask8) __U);
4641 extern __inline __m256d
4642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4643 _mm256_mask_fnmsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
4644 __m256d __C)
4646 return (__m256d) __builtin_ia32_vfnmsubpd256_mask ((__v4df) __A,
4647 (__v4df) __B,
4648 (__v4df) __C,
4649 (__mmask8) __U);
4652 extern __inline __m256d
4653 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4654 _mm256_mask3_fnmsub_pd (__m256d __A, __m256d __B, __m256d __C,
4655 __mmask8 __U)
4657 return (__m256d) __builtin_ia32_vfnmsubpd256_mask3 ((__v4df) __A,
4658 (__v4df) __B,
4659 (__v4df) __C,
4660 (__mmask8) __U);
4663 extern __inline __m256d
4664 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4665 _mm256_maskz_fnmsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
4666 __m256d __C)
4668 return (__m256d) __builtin_ia32_vfnmsubpd256_maskz ((__v4df) __A,
4669 (__v4df) __B,
4670 (__v4df) __C,
4671 (__mmask8) __U);
4674 extern __inline __m128d
4675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4676 _mm_mask_fnmsub_pd (__m128d __A, __mmask8 __U, __m128d __B,
4677 __m128d __C)
4679 return (__m128d) __builtin_ia32_vfnmsubpd128_mask ((__v2df) __A,
4680 (__v2df) __B,
4681 (__v2df) __C,
4682 (__mmask8) __U);
4685 extern __inline __m128d
4686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4687 _mm_mask3_fnmsub_pd (__m128d __A, __m128d __B, __m128d __C,
4688 __mmask8 __U)
4690 return (__m128d) __builtin_ia32_vfnmsubpd128_mask3 ((__v2df) __A,
4691 (__v2df) __B,
4692 (__v2df) __C,
4693 (__mmask8) __U);
4696 extern __inline __m128d
4697 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4698 _mm_maskz_fnmsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
4699 __m128d __C)
4701 return (__m128d) __builtin_ia32_vfnmsubpd128_maskz ((__v2df) __A,
4702 (__v2df) __B,
4703 (__v2df) __C,
4704 (__mmask8) __U);
4707 extern __inline __m256
4708 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4709 _mm256_mask_fnmsub_ps (__m256 __A, __mmask8 __U, __m256 __B,
4710 __m256 __C)
4712 return (__m256) __builtin_ia32_vfnmsubps256_mask ((__v8sf) __A,
4713 (__v8sf) __B,
4714 (__v8sf) __C,
4715 (__mmask8) __U);
4718 extern __inline __m256
4719 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4720 _mm256_mask3_fnmsub_ps (__m256 __A, __m256 __B, __m256 __C,
4721 __mmask8 __U)
4723 return (__m256) __builtin_ia32_vfnmsubps256_mask3 ((__v8sf) __A,
4724 (__v8sf) __B,
4725 (__v8sf) __C,
4726 (__mmask8) __U);
4729 extern __inline __m256
4730 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4731 _mm256_maskz_fnmsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
4732 __m256 __C)
4734 return (__m256) __builtin_ia32_vfnmsubps256_maskz ((__v8sf) __A,
4735 (__v8sf) __B,
4736 (__v8sf) __C,
4737 (__mmask8) __U);
4740 extern __inline __m128
4741 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4742 _mm_mask_fnmsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4744 return (__m128) __builtin_ia32_vfnmsubps128_mask ((__v4sf) __A,
4745 (__v4sf) __B,
4746 (__v4sf) __C,
4747 (__mmask8) __U);
4750 extern __inline __m128
4751 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4752 _mm_mask3_fnmsub_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4754 return (__m128) __builtin_ia32_vfnmsubps128_mask3 ((__v4sf) __A,
4755 (__v4sf) __B,
4756 (__v4sf) __C,
4757 (__mmask8) __U);
4760 extern __inline __m128
4761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4762 _mm_maskz_fnmsub_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4764 return (__m128) __builtin_ia32_vfnmsubps128_maskz ((__v4sf) __A,
4765 (__v4sf) __B,
4766 (__v4sf) __C,
4767 (__mmask8) __U);
4770 extern __inline __m128i
4771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4772 _mm_mask_and_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
4773 __m128i __B)
4775 return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
4776 (__v4si) __B,
4777 (__v4si) __W,
4778 (__mmask8) __U);
4781 extern __inline __m128i
4782 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4783 _mm_maskz_and_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4785 return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
4786 (__v4si) __B,
4787 (__v4si)
4788 _mm_setzero_si128 (),
4789 (__mmask8) __U);
4792 extern __inline __m256i
4793 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4794 _mm256_mask_andnot_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
4795 __m256i __B)
4797 return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A,
4798 (__v8si) __B,
4799 (__v8si) __W,
4800 (__mmask8) __U);
4803 extern __inline __m256i
4804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4805 _mm256_maskz_andnot_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4807 return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A,
4808 (__v8si) __B,
4809 (__v8si)
4810 _mm256_setzero_si256 (),
4811 (__mmask8) __U);
4814 extern __inline __m128i
4815 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4816 _mm_mask_andnot_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
4817 __m128i __B)
4819 return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A,
4820 (__v4si) __B,
4821 (__v4si) __W,
4822 (__mmask8) __U);
4825 extern __inline __m128i
4826 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4827 _mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4829 return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A,
4830 (__v4si) __B,
4831 (__v4si)
4832 _mm_setzero_si128 (),
4833 (__mmask8) __U);
4836 extern __inline __m256i
4837 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4838 _mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
4839 __m256i __B)
4841 return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A,
4842 (__v8si) __B,
4843 (__v8si) __W,
4844 (__mmask8) __U);
4847 extern __inline __m256i
4848 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4849 _mm256_maskz_or_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4851 return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A,
4852 (__v8si) __B,
4853 (__v8si)
4854 _mm256_setzero_si256 (),
4855 (__mmask8) __U);
4858 extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
4859 _mm256_or_epi32 (__m256i __A, __m256i __B)
4861 return (__m256i) ((__v8su)__A | (__v8su)__B);
4864 extern __inline __m128i
4865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4866 _mm_mask_or_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4868 return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
4869 (__v4si) __B,
4870 (__v4si) __W,
4871 (__mmask8) __U);
4874 extern __inline __m128i
4875 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4876 _mm_maskz_or_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4878 return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
4879 (__v4si) __B,
4880 (__v4si)
4881 _mm_setzero_si128 (),
4882 (__mmask8) __U);
4885 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
4886 _mm_or_epi32 (__m128i __A, __m128i __B)
4888 return (__m128i) ((__v4su)__A | (__v4su)__B);
4891 extern __inline __m256i
4892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4893 _mm256_mask_xor_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
4894 __m256i __B)
4896 return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A,
4897 (__v8si) __B,
4898 (__v8si) __W,
4899 (__mmask8) __U);
4902 extern __inline __m256i
4903 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4904 _mm256_maskz_xor_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4906 return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A,
4907 (__v8si) __B,
4908 (__v8si)
4909 _mm256_setzero_si256 (),
4910 (__mmask8) __U);
4913 extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
4914 _mm256_xor_epi32 (__m256i __A, __m256i __B)
4916 return (__m256i) ((__v8su)__A ^ (__v8su)__B);
4919 extern __inline __m128i
4920 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4921 _mm_mask_xor_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
4922 __m128i __B)
4924 return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A,
4925 (__v4si) __B,
4926 (__v4si) __W,
4927 (__mmask8) __U);
4930 extern __inline __m128i
4931 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4932 _mm_maskz_xor_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4934 return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A,
4935 (__v4si) __B,
4936 (__v4si)
4937 _mm_setzero_si128 (),
4938 (__mmask8) __U);
4941 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
4942 _mm_xor_epi32 (__m128i __A, __m128i __B)
4944 return (__m128i) ((__v4su)__A ^ (__v4su)__B);
4947 extern __inline __m128
4948 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4949 _mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A)
4951 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
4952 (__v4sf) __W,
4953 (__mmask8) __U);
4956 extern __inline __m128
4957 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4958 _mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A)
4960 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
4961 (__v4sf)
4962 _mm_setzero_ps (),
4963 (__mmask8) __U);
4966 extern __inline __m128
4967 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4968 _mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A)
4970 return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
4971 (__v4sf) __W,
4972 (__mmask8) __U);
4975 extern __inline __m128
4976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4977 _mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A)
4979 return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
4980 (__v4sf)
4981 _mm_setzero_ps (),
4982 (__mmask8) __U);
4985 extern __inline __m256i
4986 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4987 _mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A)
4989 return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
4990 (__v8si) __W,
4991 (__mmask8) __U);
4994 extern __inline __m256i
4995 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4996 _mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A)
4998 return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
4999 (__v8si)
5000 _mm256_setzero_si256 (),
5001 (__mmask8) __U);
5004 extern __inline __m128i
5005 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5006 _mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A)
5008 return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
5009 (__v4si) __W,
5010 (__mmask8) __U);
5013 extern __inline __m128i
5014 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5015 _mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A)
5017 return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
5018 (__v4si)
5019 _mm_setzero_si128 (),
5020 (__mmask8) __U);
5023 extern __inline __m256i
5024 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5025 _mm256_cvtps_epu32 (__m256 __A)
5027 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
5028 (__v8si)
5029 _mm256_setzero_si256 (),
5030 (__mmask8) -1);
5033 extern __inline __m256i
5034 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5035 _mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A)
5037 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
5038 (__v8si) __W,
5039 (__mmask8) __U);
5042 extern __inline __m256i
5043 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5044 _mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A)
5046 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
5047 (__v8si)
5048 _mm256_setzero_si256 (),
5049 (__mmask8) __U);
5052 extern __inline __m128i
5053 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5054 _mm_cvtps_epu32 (__m128 __A)
5056 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
5057 (__v4si)
5058 _mm_setzero_si128 (),
5059 (__mmask8) -1);
5062 extern __inline __m128i
5063 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5064 _mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A)
5066 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
5067 (__v4si) __W,
5068 (__mmask8) __U);
5071 extern __inline __m128i
5072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5073 _mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A)
5075 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
5076 (__v4si)
5077 _mm_setzero_si128 (),
5078 (__mmask8) __U);
5081 extern __inline __m256d
5082 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5083 _mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A)
5085 return (__m256d) __builtin_ia32_movddup256_mask ((__v4df) __A,
5086 (__v4df) __W,
5087 (__mmask8) __U);
5090 extern __inline __m256d
5091 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5092 _mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A)
5094 return (__m256d) __builtin_ia32_movddup256_mask ((__v4df) __A,
5095 (__v4df)
5096 _mm256_setzero_pd (),
5097 (__mmask8) __U);
5100 extern __inline __m128d
5101 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5102 _mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A)
5104 return (__m128d) __builtin_ia32_movddup128_mask ((__v2df) __A,
5105 (__v2df) __W,
5106 (__mmask8) __U);
5109 extern __inline __m128d
5110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5111 _mm_maskz_movedup_pd (__mmask8 __U, __m128d __A)
5113 return (__m128d) __builtin_ia32_movddup128_mask ((__v2df) __A,
5114 (__v2df)
5115 _mm_setzero_pd (),
5116 (__mmask8) __U);
5119 extern __inline __m256
5120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5121 _mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A)
5123 return (__m256) __builtin_ia32_movshdup256_mask ((__v8sf) __A,
5124 (__v8sf) __W,
5125 (__mmask8) __U);
5128 extern __inline __m256
5129 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5130 _mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A)
5132 return (__m256) __builtin_ia32_movshdup256_mask ((__v8sf) __A,
5133 (__v8sf)
5134 _mm256_setzero_ps (),
5135 (__mmask8) __U);
5138 extern __inline __m128
5139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5140 _mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A)
5142 return (__m128) __builtin_ia32_movshdup128_mask ((__v4sf) __A,
5143 (__v4sf) __W,
5144 (__mmask8) __U);
5147 extern __inline __m128
5148 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5149 _mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A)
5151 return (__m128) __builtin_ia32_movshdup128_mask ((__v4sf) __A,
5152 (__v4sf)
5153 _mm_setzero_ps (),
5154 (__mmask8) __U);
5157 extern __inline __m256
5158 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5159 _mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A)
5161 return (__m256) __builtin_ia32_movsldup256_mask ((__v8sf) __A,
5162 (__v8sf) __W,
5163 (__mmask8) __U);
5166 extern __inline __m256
5167 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5168 _mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A)
5170 return (__m256) __builtin_ia32_movsldup256_mask ((__v8sf) __A,
5171 (__v8sf)
5172 _mm256_setzero_ps (),
5173 (__mmask8) __U);
5176 extern __inline __m128
5177 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5178 _mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A)
5180 return (__m128) __builtin_ia32_movsldup128_mask ((__v4sf) __A,
5181 (__v4sf) __W,
5182 (__mmask8) __U);
5185 extern __inline __m128
5186 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5187 _mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A)
5189 return (__m128) __builtin_ia32_movsldup128_mask ((__v4sf) __A,
5190 (__v4sf)
5191 _mm_setzero_ps (),
5192 (__mmask8) __U);
5195 extern __inline __m128i
5196 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5197 _mm_mask_unpackhi_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
5198 __m128i __B)
5200 return (__m128i) __builtin_ia32_punpckhdq128_mask ((__v4si) __A,
5201 (__v4si) __B,
5202 (__v4si) __W,
5203 (__mmask8) __U);
5206 extern __inline __m128i
5207 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5208 _mm_maskz_unpackhi_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
5210 return (__m128i) __builtin_ia32_punpckhdq128_mask ((__v4si) __A,
5211 (__v4si) __B,
5212 (__v4si)
5213 _mm_setzero_si128 (),
5214 (__mmask8) __U);
5217 extern __inline __m256i
5218 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5219 _mm256_mask_unpackhi_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
5220 __m256i __B)
5222 return (__m256i) __builtin_ia32_punpckhdq256_mask ((__v8si) __A,
5223 (__v8si) __B,
5224 (__v8si) __W,
5225 (__mmask8) __U);
5228 extern __inline __m256i
5229 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5230 _mm256_maskz_unpackhi_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
5232 return (__m256i) __builtin_ia32_punpckhdq256_mask ((__v8si) __A,
5233 (__v8si) __B,
5234 (__v8si)
5235 _mm256_setzero_si256 (),
5236 (__mmask8) __U);
5239 extern __inline __m128i
5240 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5241 _mm_mask_unpackhi_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
5242 __m128i __B)
5244 return (__m128i) __builtin_ia32_punpckhqdq128_mask ((__v2di) __A,
5245 (__v2di) __B,
5246 (__v2di) __W,
5247 (__mmask8) __U);
5250 extern __inline __m128i
5251 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5252 _mm_maskz_unpackhi_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
5254 return (__m128i) __builtin_ia32_punpckhqdq128_mask ((__v2di) __A,
5255 (__v2di) __B,
5256 (__v2di)
5257 _mm_setzero_si128 (),
5258 (__mmask8) __U);
5261 extern __inline __m256i
5262 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5263 _mm256_mask_unpackhi_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
5264 __m256i __B)
5266 return (__m256i) __builtin_ia32_punpckhqdq256_mask ((__v4di) __A,
5267 (__v4di) __B,
5268 (__v4di) __W,
5269 (__mmask8) __U);
5272 extern __inline __m256i
5273 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5274 _mm256_maskz_unpackhi_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
5276 return (__m256i) __builtin_ia32_punpckhqdq256_mask ((__v4di) __A,
5277 (__v4di) __B,
5278 (__v4di)
5279 _mm256_setzero_si256 (),
5280 (__mmask8) __U);
5283 extern __inline __m128i
5284 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5285 _mm_mask_unpacklo_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
5286 __m128i __B)
5288 return (__m128i) __builtin_ia32_punpckldq128_mask ((__v4si) __A,
5289 (__v4si) __B,
5290 (__v4si) __W,
5291 (__mmask8) __U);
5294 extern __inline __m128i
5295 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5296 _mm_maskz_unpacklo_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
5298 return (__m128i) __builtin_ia32_punpckldq128_mask ((__v4si) __A,
5299 (__v4si) __B,
5300 (__v4si)
5301 _mm_setzero_si128 (),
5302 (__mmask8) __U);
5305 extern __inline __m256i
5306 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5307 _mm256_mask_unpacklo_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
5308 __m256i __B)
5310 return (__m256i) __builtin_ia32_punpckldq256_mask ((__v8si) __A,
5311 (__v8si) __B,
5312 (__v8si) __W,
5313 (__mmask8) __U);
5316 extern __inline __m256i
5317 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5318 _mm256_maskz_unpacklo_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
5320 return (__m256i) __builtin_ia32_punpckldq256_mask ((__v8si) __A,
5321 (__v8si) __B,
5322 (__v8si)
5323 _mm256_setzero_si256 (),
5324 (__mmask8) __U);
5327 extern __inline __m128i
5328 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5329 _mm_mask_unpacklo_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
5330 __m128i __B)
5332 return (__m128i) __builtin_ia32_punpcklqdq128_mask ((__v2di) __A,
5333 (__v2di) __B,
5334 (__v2di) __W,
5335 (__mmask8) __U);
5338 extern __inline __m128i
5339 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5340 _mm_maskz_unpacklo_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
5342 return (__m128i) __builtin_ia32_punpcklqdq128_mask ((__v2di) __A,
5343 (__v2di) __B,
5344 (__v2di)
5345 _mm_setzero_si128 (),
5346 (__mmask8) __U);
5349 extern __inline __m256i
5350 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5351 _mm256_mask_unpacklo_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
5352 __m256i __B)
5354 return (__m256i) __builtin_ia32_punpcklqdq256_mask ((__v4di) __A,
5355 (__v4di) __B,
5356 (__v4di) __W,
5357 (__mmask8) __U);
5360 extern __inline __m256i
5361 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5362 _mm256_maskz_unpacklo_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
5364 return (__m256i) __builtin_ia32_punpcklqdq256_mask ((__v4di) __A,
5365 (__v4di) __B,
5366 (__v4di)
5367 _mm256_setzero_si256 (),
5368 (__mmask8) __U);
5371 extern __inline __mmask8
5372 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5373 _mm_cmpeq_epu32_mask (__m128i __A, __m128i __B)
5375 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5376 (__v4si) __B, 0,
5377 (__mmask8) -1);
5380 extern __inline __mmask8
5381 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5382 _mm_cmpeq_epi32_mask (__m128i __A, __m128i __B)
5384 return (__mmask8) __builtin_ia32_pcmpeqd128_mask ((__v4si) __A,
5385 (__v4si) __B,
5386 (__mmask8) -1);
5389 extern __inline __mmask8
5390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5391 _mm_mask_cmpeq_epu32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5393 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5394 (__v4si) __B, 0, __U);
5397 extern __inline __mmask8
5398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5399 _mm_mask_cmpeq_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5401 return (__mmask8) __builtin_ia32_pcmpeqd128_mask ((__v4si) __A,
5402 (__v4si) __B, __U);
5405 extern __inline __mmask8
5406 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5407 _mm256_cmpeq_epu32_mask (__m256i __A, __m256i __B)
5409 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5410 (__v8si) __B, 0,
5411 (__mmask8) -1);
5414 extern __inline __mmask8
5415 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5416 _mm256_cmpeq_epi32_mask (__m256i __A, __m256i __B)
5418 return (__mmask8) __builtin_ia32_pcmpeqd256_mask ((__v8si) __A,
5419 (__v8si) __B,
5420 (__mmask8) -1);
5423 extern __inline __mmask8
5424 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5425 _mm256_mask_cmpeq_epu32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5427 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5428 (__v8si) __B, 0, __U);
5431 extern __inline __mmask8
5432 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5433 _mm256_mask_cmpeq_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5435 return (__mmask8) __builtin_ia32_pcmpeqd256_mask ((__v8si) __A,
5436 (__v8si) __B, __U);
5439 extern __inline __mmask8
5440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5441 _mm_cmpeq_epu64_mask (__m128i __A, __m128i __B)
5443 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5444 (__v2di) __B, 0,
5445 (__mmask8) -1);
5448 extern __inline __mmask8
5449 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5450 _mm_cmpeq_epi64_mask (__m128i __A, __m128i __B)
5452 return (__mmask8) __builtin_ia32_pcmpeqq128_mask ((__v2di) __A,
5453 (__v2di) __B,
5454 (__mmask8) -1);
5457 extern __inline __mmask8
5458 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5459 _mm_mask_cmpeq_epu64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5461 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5462 (__v2di) __B, 0, __U);
5465 extern __inline __mmask8
5466 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5467 _mm_mask_cmpeq_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5469 return (__mmask8) __builtin_ia32_pcmpeqq128_mask ((__v2di) __A,
5470 (__v2di) __B, __U);
5473 extern __inline __mmask8
5474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5475 _mm256_cmpeq_epu64_mask (__m256i __A, __m256i __B)
5477 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5478 (__v4di) __B, 0,
5479 (__mmask8) -1);
5482 extern __inline __mmask8
5483 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5484 _mm256_cmpeq_epi64_mask (__m256i __A, __m256i __B)
5486 return (__mmask8) __builtin_ia32_pcmpeqq256_mask ((__v4di) __A,
5487 (__v4di) __B,
5488 (__mmask8) -1);
5491 extern __inline __mmask8
5492 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5493 _mm256_mask_cmpeq_epu64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5495 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5496 (__v4di) __B, 0, __U);
5499 extern __inline __mmask8
5500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5501 _mm256_mask_cmpeq_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5503 return (__mmask8) __builtin_ia32_pcmpeqq256_mask ((__v4di) __A,
5504 (__v4di) __B, __U);
5507 extern __inline __mmask8
5508 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5509 _mm_cmpgt_epu32_mask (__m128i __A, __m128i __B)
5511 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5512 (__v4si) __B, 6,
5513 (__mmask8) -1);
5516 extern __inline __mmask8
5517 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5518 _mm_cmpgt_epi32_mask (__m128i __A, __m128i __B)
5520 return (__mmask8) __builtin_ia32_pcmpgtd128_mask ((__v4si) __A,
5521 (__v4si) __B,
5522 (__mmask8) -1);
5525 extern __inline __mmask8
5526 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5527 _mm_mask_cmpgt_epu32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5529 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5530 (__v4si) __B, 6, __U);
5533 extern __inline __mmask8
5534 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5535 _mm_mask_cmpgt_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5537 return (__mmask8) __builtin_ia32_pcmpgtd128_mask ((__v4si) __A,
5538 (__v4si) __B, __U);
5541 extern __inline __mmask8
5542 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5543 _mm256_cmpgt_epu32_mask (__m256i __A, __m256i __B)
5545 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5546 (__v8si) __B, 6,
5547 (__mmask8) -1);
5550 extern __inline __mmask8
5551 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5552 _mm256_cmpgt_epi32_mask (__m256i __A, __m256i __B)
5554 return (__mmask8) __builtin_ia32_pcmpgtd256_mask ((__v8si) __A,
5555 (__v8si) __B,
5556 (__mmask8) -1);
5559 extern __inline __mmask8
5560 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5561 _mm256_mask_cmpgt_epu32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5563 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5564 (__v8si) __B, 6, __U);
5567 extern __inline __mmask8
5568 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5569 _mm256_mask_cmpgt_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5571 return (__mmask8) __builtin_ia32_pcmpgtd256_mask ((__v8si) __A,
5572 (__v8si) __B, __U);
5575 extern __inline __mmask8
5576 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5577 _mm_cmpgt_epu64_mask (__m128i __A, __m128i __B)
5579 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5580 (__v2di) __B, 6,
5581 (__mmask8) -1);
5584 extern __inline __mmask8
5585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5586 _mm_cmpgt_epi64_mask (__m128i __A, __m128i __B)
5588 return (__mmask8) __builtin_ia32_pcmpgtq128_mask ((__v2di) __A,
5589 (__v2di) __B,
5590 (__mmask8) -1);
5593 extern __inline __mmask8
5594 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5595 _mm_mask_cmpgt_epu64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5597 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5598 (__v2di) __B, 6, __U);
5601 extern __inline __mmask8
5602 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5603 _mm_mask_cmpgt_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5605 return (__mmask8) __builtin_ia32_pcmpgtq128_mask ((__v2di) __A,
5606 (__v2di) __B, __U);
5609 extern __inline __mmask8
5610 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5611 _mm256_cmpgt_epu64_mask (__m256i __A, __m256i __B)
5613 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5614 (__v4di) __B, 6,
5615 (__mmask8) -1);
5618 extern __inline __mmask8
5619 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5620 _mm256_cmpgt_epi64_mask (__m256i __A, __m256i __B)
5622 return (__mmask8) __builtin_ia32_pcmpgtq256_mask ((__v4di) __A,
5623 (__v4di) __B,
5624 (__mmask8) -1);
5627 extern __inline __mmask8
5628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5629 _mm256_mask_cmpgt_epu64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5631 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5632 (__v4di) __B, 6, __U);
5635 extern __inline __mmask8
5636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5637 _mm256_mask_cmpgt_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5639 return (__mmask8) __builtin_ia32_pcmpgtq256_mask ((__v4di) __A,
5640 (__v4di) __B, __U);
5643 extern __inline __mmask8
5644 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5645 _mm_test_epi32_mask (__m128i __A, __m128i __B)
5647 return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
5648 (__v4si) __B,
5649 (__mmask8) -1);
5652 extern __inline __mmask8
5653 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5654 _mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5656 return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
5657 (__v4si) __B, __U);
5660 extern __inline __mmask8
5661 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5662 _mm256_test_epi32_mask (__m256i __A, __m256i __B)
5664 return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
5665 (__v8si) __B,
5666 (__mmask8) -1);
5669 extern __inline __mmask8
5670 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5671 _mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5673 return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
5674 (__v8si) __B, __U);
5677 extern __inline __mmask8
5678 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5679 _mm_test_epi64_mask (__m128i __A, __m128i __B)
5681 return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
5682 (__v2di) __B,
5683 (__mmask8) -1);
5686 extern __inline __mmask8
5687 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5688 _mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5690 return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
5691 (__v2di) __B, __U);
5694 extern __inline __mmask8
5695 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5696 _mm256_test_epi64_mask (__m256i __A, __m256i __B)
5698 return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
5699 (__v4di) __B,
5700 (__mmask8) -1);
5703 extern __inline __mmask8
5704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5705 _mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5707 return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
5708 (__v4di) __B, __U);
5711 extern __inline __mmask8
5712 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5713 _mm_testn_epi32_mask (__m128i __A, __m128i __B)
5715 return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
5716 (__v4si) __B,
5717 (__mmask8) -1);
5720 extern __inline __mmask8
5721 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5722 _mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5724 return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
5725 (__v4si) __B, __U);
5728 extern __inline __mmask8
5729 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5730 _mm256_testn_epi32_mask (__m256i __A, __m256i __B)
5732 return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
5733 (__v8si) __B,
5734 (__mmask8) -1);
5737 extern __inline __mmask8
5738 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5739 _mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5741 return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
5742 (__v8si) __B, __U);
5745 extern __inline __mmask8
5746 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5747 _mm_testn_epi64_mask (__m128i __A, __m128i __B)
5749 return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
5750 (__v2di) __B,
5751 (__mmask8) -1);
5754 extern __inline __mmask8
5755 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5756 _mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5758 return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
5759 (__v2di) __B, __U);
5762 extern __inline __mmask8
5763 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5764 _mm256_testn_epi64_mask (__m256i __A, __m256i __B)
5766 return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
5767 (__v4di) __B,
5768 (__mmask8) -1);
5771 extern __inline __mmask8
5772 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5773 _mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5775 return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
5776 (__v4di) __B, __U);
5779 extern __inline __m256d
5780 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5781 _mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A)
5783 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
5784 (__v4df) __W,
5785 (__mmask8) __U);
5788 extern __inline __m256d
5789 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5790 _mm256_maskz_compress_pd (__mmask8 __U, __m256d __A)
5792 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
5793 (__v4df)
5794 _mm256_setzero_pd (),
5795 (__mmask8) __U);
5798 extern __inline void
5799 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5800 _mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A)
5802 __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
5803 (__v4df) __A,
5804 (__mmask8) __U);
5807 extern __inline __m128d
5808 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5809 _mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A)
5811 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
5812 (__v2df) __W,
5813 (__mmask8) __U);
5816 extern __inline __m128d
5817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5818 _mm_maskz_compress_pd (__mmask8 __U, __m128d __A)
5820 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
5821 (__v2df)
5822 _mm_setzero_pd (),
5823 (__mmask8) __U);
5826 extern __inline void
5827 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5828 _mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A)
5830 __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
5831 (__v2df) __A,
5832 (__mmask8) __U);
5835 extern __inline __m256
5836 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5837 _mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A)
5839 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
5840 (__v8sf) __W,
5841 (__mmask8) __U);
5844 extern __inline __m256
5845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5846 _mm256_maskz_compress_ps (__mmask8 __U, __m256 __A)
5848 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
5849 (__v8sf)
5850 _mm256_setzero_ps (),
5851 (__mmask8) __U);
5854 extern __inline void
5855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5856 _mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A)
5858 __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
5859 (__v8sf) __A,
5860 (__mmask8) __U);
5863 extern __inline __m128
5864 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5865 _mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A)
5867 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
5868 (__v4sf) __W,
5869 (__mmask8) __U);
5872 extern __inline __m128
5873 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5874 _mm_maskz_compress_ps (__mmask8 __U, __m128 __A)
5876 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
5877 (__v4sf)
5878 _mm_setzero_ps (),
5879 (__mmask8) __U);
5882 extern __inline void
5883 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5884 _mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A)
5886 __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
5887 (__v4sf) __A,
5888 (__mmask8) __U);
5891 extern __inline __m256i
5892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5893 _mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
5895 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
5896 (__v4di) __W,
5897 (__mmask8) __U);
5900 extern __inline __m256i
5901 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5902 _mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A)
5904 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
5905 (__v4di)
5906 _mm256_setzero_si256 (),
5907 (__mmask8) __U);
5910 extern __inline void
5911 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5912 _mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A)
5914 __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
5915 (__v4di) __A,
5916 (__mmask8) __U);
5919 extern __inline __m128i
5920 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5921 _mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
5923 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
5924 (__v2di) __W,
5925 (__mmask8) __U);
5928 extern __inline __m128i
5929 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5930 _mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A)
5932 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
5933 (__v2di)
5934 _mm_setzero_si128 (),
5935 (__mmask8) __U);
5938 extern __inline void
5939 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5940 _mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A)
5942 __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
5943 (__v2di) __A,
5944 (__mmask8) __U);
5947 extern __inline __m256i
5948 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5949 _mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
5951 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
5952 (__v8si) __W,
5953 (__mmask8) __U);
5956 extern __inline __m256i
5957 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5958 _mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A)
5960 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
5961 (__v8si)
5962 _mm256_setzero_si256 (),
5963 (__mmask8) __U);
5966 extern __inline void
5967 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5968 _mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A)
5970 __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
5971 (__v8si) __A,
5972 (__mmask8) __U);
5975 extern __inline __m128i
5976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5977 _mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
5979 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
5980 (__v4si) __W,
5981 (__mmask8) __U);
5984 extern __inline __m128i
5985 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5986 _mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A)
5988 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
5989 (__v4si)
5990 _mm_setzero_si128 (),
5991 (__mmask8) __U);
5994 extern __inline void
5995 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5996 _mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A)
5998 __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
5999 (__v4si) __A,
6000 (__mmask8) __U);
6003 extern __inline __m256d
6004 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6005 _mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A)
6007 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
6008 (__v4df) __W,
6009 (__mmask8) __U);
6012 extern __inline __m256d
6013 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6014 _mm256_maskz_expand_pd (__mmask8 __U, __m256d __A)
6016 return (__m256d) __builtin_ia32_expanddf256_maskz ((__v4df) __A,
6017 (__v4df)
6018 _mm256_setzero_pd (),
6019 (__mmask8) __U);
6022 extern __inline __m256d
6023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6024 _mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P)
6026 return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
6027 (__v4df) __W,
6028 (__mmask8)
6029 __U);
6032 extern __inline __m256d
6033 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6034 _mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
6036 return (__m256d) __builtin_ia32_expandloaddf256_maskz ((__v4df *) __P,
6037 (__v4df)
6038 _mm256_setzero_pd (),
6039 (__mmask8)
6040 __U);
6043 extern __inline __m128d
6044 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6045 _mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A)
6047 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
6048 (__v2df) __W,
6049 (__mmask8) __U);
6052 extern __inline __m128d
6053 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6054 _mm_maskz_expand_pd (__mmask8 __U, __m128d __A)
6056 return (__m128d) __builtin_ia32_expanddf128_maskz ((__v2df) __A,
6057 (__v2df)
6058 _mm_setzero_pd (),
6059 (__mmask8) __U);
6062 extern __inline __m128d
6063 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6064 _mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P)
6066 return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
6067 (__v2df) __W,
6068 (__mmask8)
6069 __U);
6072 extern __inline __m128d
6073 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6074 _mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
6076 return (__m128d) __builtin_ia32_expandloaddf128_maskz ((__v2df *) __P,
6077 (__v2df)
6078 _mm_setzero_pd (),
6079 (__mmask8)
6080 __U);
6083 extern __inline __m256
6084 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6085 _mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A)
6087 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
6088 (__v8sf) __W,
6089 (__mmask8) __U);
6092 extern __inline __m256
6093 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6094 _mm256_maskz_expand_ps (__mmask8 __U, __m256 __A)
6096 return (__m256) __builtin_ia32_expandsf256_maskz ((__v8sf) __A,
6097 (__v8sf)
6098 _mm256_setzero_ps (),
6099 (__mmask8) __U);
6102 extern __inline __m256
6103 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6104 _mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P)
6106 return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
6107 (__v8sf) __W,
6108 (__mmask8) __U);
6111 extern __inline __m256
6112 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6113 _mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P)
6115 return (__m256) __builtin_ia32_expandloadsf256_maskz ((__v8sf *) __P,
6116 (__v8sf)
6117 _mm256_setzero_ps (),
6118 (__mmask8)
6119 __U);
6122 extern __inline __m128
6123 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6124 _mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A)
6126 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
6127 (__v4sf) __W,
6128 (__mmask8) __U);
6131 extern __inline __m128
6132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6133 _mm_maskz_expand_ps (__mmask8 __U, __m128 __A)
6135 return (__m128) __builtin_ia32_expandsf128_maskz ((__v4sf) __A,
6136 (__v4sf)
6137 _mm_setzero_ps (),
6138 (__mmask8) __U);
6141 extern __inline __m128
6142 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6143 _mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P)
6145 return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
6146 (__v4sf) __W,
6147 (__mmask8) __U);
6150 extern __inline __m128
6151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6152 _mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P)
6154 return (__m128) __builtin_ia32_expandloadsf128_maskz ((__v4sf *) __P,
6155 (__v4sf)
6156 _mm_setzero_ps (),
6157 (__mmask8)
6158 __U);
6161 extern __inline __m256i
6162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6163 _mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
6165 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
6166 (__v4di) __W,
6167 (__mmask8) __U);
6170 extern __inline __m256i
6171 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6172 _mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A)
6174 return (__m256i) __builtin_ia32_expanddi256_maskz ((__v4di) __A,
6175 (__v4di)
6176 _mm256_setzero_si256 (),
6177 (__mmask8) __U);
6180 extern __inline __m256i
6181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6182 _mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U,
6183 void const *__P)
6185 return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
6186 (__v4di) __W,
6187 (__mmask8)
6188 __U);
6191 extern __inline __m256i
6192 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6193 _mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
6195 return (__m256i) __builtin_ia32_expandloaddi256_maskz ((__v4di *) __P,
6196 (__v4di)
6197 _mm256_setzero_si256 (),
6198 (__mmask8)
6199 __U);
6202 extern __inline __m128i
6203 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6204 _mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
6206 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
6207 (__v2di) __W,
6208 (__mmask8) __U);
6211 extern __inline __m128i
6212 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6213 _mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A)
6215 return (__m128i) __builtin_ia32_expanddi128_maskz ((__v2di) __A,
6216 (__v2di)
6217 _mm_setzero_si128 (),
6218 (__mmask8) __U);
6221 extern __inline __m128i
6222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6223 _mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
6225 return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
6226 (__v2di) __W,
6227 (__mmask8)
6228 __U);
6231 extern __inline __m128i
6232 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6233 _mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
6235 return (__m128i) __builtin_ia32_expandloaddi128_maskz ((__v2di *) __P,
6236 (__v2di)
6237 _mm_setzero_si128 (),
6238 (__mmask8)
6239 __U);
6242 extern __inline __m256i
6243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6244 _mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
6246 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
6247 (__v8si) __W,
6248 (__mmask8) __U);
6251 extern __inline __m256i
6252 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6253 _mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A)
6255 return (__m256i) __builtin_ia32_expandsi256_maskz ((__v8si) __A,
6256 (__v8si)
6257 _mm256_setzero_si256 (),
6258 (__mmask8) __U);
6261 extern __inline __m256i
6262 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6263 _mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U,
6264 void const *__P)
6266 return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
6267 (__v8si) __W,
6268 (__mmask8)
6269 __U);
6272 extern __inline __m256i
6273 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6274 _mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P)
6276 return (__m256i) __builtin_ia32_expandloadsi256_maskz ((__v8si *) __P,
6277 (__v8si)
6278 _mm256_setzero_si256 (),
6279 (__mmask8)
6280 __U);
6283 extern __inline __m128i
6284 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6285 _mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
6287 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
6288 (__v4si) __W,
6289 (__mmask8) __U);
6292 extern __inline __m128i
6293 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6294 _mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A)
6296 return (__m128i) __builtin_ia32_expandsi128_maskz ((__v4si) __A,
6297 (__v4si)
6298 _mm_setzero_si128 (),
6299 (__mmask8) __U);
6302 extern __inline __m128i
6303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6304 _mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
6306 return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
6307 (__v4si) __W,
6308 (__mmask8)
6309 __U);
6312 extern __inline __m128i
6313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6314 _mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P)
6316 return (__m128i) __builtin_ia32_expandloadsi128_maskz ((__v4si *) __P,
6317 (__v4si)
6318 _mm_setzero_si128 (),
6319 (__mmask8)
6320 __U);
6323 extern __inline __m256d
6324 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6325 _mm256_permutex2var_pd (__m256d __A, __m256i __I, __m256d __B)
6327 return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
6328 /* idx */ ,
6329 (__v4df) __A,
6330 (__v4df) __B,
6331 (__mmask8) -1);
6334 extern __inline __m256d
6335 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6336 _mm256_mask_permutex2var_pd (__m256d __A, __mmask8 __U, __m256i __I,
6337 __m256d __B)
6339 return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
6340 /* idx */ ,
6341 (__v4df) __A,
6342 (__v4df) __B,
6343 (__mmask8)
6344 __U);
6347 extern __inline __m256d
6348 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6349 _mm256_mask2_permutex2var_pd (__m256d __A, __m256i __I, __mmask8 __U,
6350 __m256d __B)
6352 return (__m256d) __builtin_ia32_vpermi2varpd256_mask ((__v4df) __A,
6353 (__v4di) __I
6354 /* idx */ ,
6355 (__v4df) __B,
6356 (__mmask8)
6357 __U);
6360 extern __inline __m256d
6361 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6362 _mm256_maskz_permutex2var_pd (__mmask8 __U, __m256d __A, __m256i __I,
6363 __m256d __B)
6365 return (__m256d) __builtin_ia32_vpermt2varpd256_maskz ((__v4di) __I
6366 /* idx */ ,
6367 (__v4df) __A,
6368 (__v4df) __B,
6369 (__mmask8)
6370 __U);
6373 extern __inline __m256
6374 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6375 _mm256_permutex2var_ps (__m256 __A, __m256i __I, __m256 __B)
6377 return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
6378 /* idx */ ,
6379 (__v8sf) __A,
6380 (__v8sf) __B,
6381 (__mmask8) -1);
6384 extern __inline __m256
6385 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6386 _mm256_mask_permutex2var_ps (__m256 __A, __mmask8 __U, __m256i __I,
6387 __m256 __B)
6389 return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
6390 /* idx */ ,
6391 (__v8sf) __A,
6392 (__v8sf) __B,
6393 (__mmask8) __U);
6396 extern __inline __m256
6397 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6398 _mm256_mask2_permutex2var_ps (__m256 __A, __m256i __I, __mmask8 __U,
6399 __m256 __B)
6401 return (__m256) __builtin_ia32_vpermi2varps256_mask ((__v8sf) __A,
6402 (__v8si) __I
6403 /* idx */ ,
6404 (__v8sf) __B,
6405 (__mmask8) __U);
6408 extern __inline __m256
6409 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6410 _mm256_maskz_permutex2var_ps (__mmask8 __U, __m256 __A, __m256i __I,
6411 __m256 __B)
6413 return (__m256) __builtin_ia32_vpermt2varps256_maskz ((__v8si) __I
6414 /* idx */ ,
6415 (__v8sf) __A,
6416 (__v8sf) __B,
6417 (__mmask8)
6418 __U);
6421 extern __inline __m128i
6422 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6423 _mm_permutex2var_epi64 (__m128i __A, __m128i __I, __m128i __B)
6425 return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
6426 /* idx */ ,
6427 (__v2di) __A,
6428 (__v2di) __B,
6429 (__mmask8) -1);
6432 extern __inline __m128i
6433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6434 _mm_mask_permutex2var_epi64 (__m128i __A, __mmask8 __U, __m128i __I,
6435 __m128i __B)
6437 return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
6438 /* idx */ ,
6439 (__v2di) __A,
6440 (__v2di) __B,
6441 (__mmask8) __U);
6444 extern __inline __m128i
6445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6446 _mm_mask2_permutex2var_epi64 (__m128i __A, __m128i __I, __mmask8 __U,
6447 __m128i __B)
6449 return (__m128i) __builtin_ia32_vpermi2varq128_mask ((__v2di) __A,
6450 (__v2di) __I
6451 /* idx */ ,
6452 (__v2di) __B,
6453 (__mmask8) __U);
6456 extern __inline __m128i
6457 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6458 _mm_maskz_permutex2var_epi64 (__mmask8 __U, __m128i __A, __m128i __I,
6459 __m128i __B)
6461 return (__m128i) __builtin_ia32_vpermt2varq128_maskz ((__v2di) __I
6462 /* idx */ ,
6463 (__v2di) __A,
6464 (__v2di) __B,
6465 (__mmask8)
6466 __U);
6469 extern __inline __m128i
6470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6471 _mm_permutex2var_epi32 (__m128i __A, __m128i __I, __m128i __B)
6473 return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
6474 /* idx */ ,
6475 (__v4si) __A,
6476 (__v4si) __B,
6477 (__mmask8) -1);
6480 extern __inline __m128i
6481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6482 _mm_mask_permutex2var_epi32 (__m128i __A, __mmask8 __U, __m128i __I,
6483 __m128i __B)
6485 return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
6486 /* idx */ ,
6487 (__v4si) __A,
6488 (__v4si) __B,
6489 (__mmask8) __U);
6492 extern __inline __m128i
6493 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6494 _mm_mask2_permutex2var_epi32 (__m128i __A, __m128i __I, __mmask8 __U,
6495 __m128i __B)
6497 return (__m128i) __builtin_ia32_vpermi2vard128_mask ((__v4si) __A,
6498 (__v4si) __I
6499 /* idx */ ,
6500 (__v4si) __B,
6501 (__mmask8) __U);
6504 extern __inline __m128i
6505 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6506 _mm_maskz_permutex2var_epi32 (__mmask8 __U, __m128i __A, __m128i __I,
6507 __m128i __B)
6509 return (__m128i) __builtin_ia32_vpermt2vard128_maskz ((__v4si) __I
6510 /* idx */ ,
6511 (__v4si) __A,
6512 (__v4si) __B,
6513 (__mmask8)
6514 __U);
6517 extern __inline __m256i
6518 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6519 _mm256_permutex2var_epi64 (__m256i __A, __m256i __I, __m256i __B)
6521 return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
6522 /* idx */ ,
6523 (__v4di) __A,
6524 (__v4di) __B,
6525 (__mmask8) -1);
6528 extern __inline __m256i
6529 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6530 _mm256_mask_permutex2var_epi64 (__m256i __A, __mmask8 __U, __m256i __I,
6531 __m256i __B)
6533 return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
6534 /* idx */ ,
6535 (__v4di) __A,
6536 (__v4di) __B,
6537 (__mmask8) __U);
6540 extern __inline __m256i
6541 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6542 _mm256_mask2_permutex2var_epi64 (__m256i __A, __m256i __I,
6543 __mmask8 __U, __m256i __B)
6545 return (__m256i) __builtin_ia32_vpermi2varq256_mask ((__v4di) __A,
6546 (__v4di) __I
6547 /* idx */ ,
6548 (__v4di) __B,
6549 (__mmask8) __U);
6552 extern __inline __m256i
6553 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6554 _mm256_maskz_permutex2var_epi64 (__mmask8 __U, __m256i __A,
6555 __m256i __I, __m256i __B)
6557 return (__m256i) __builtin_ia32_vpermt2varq256_maskz ((__v4di) __I
6558 /* idx */ ,
6559 (__v4di) __A,
6560 (__v4di) __B,
6561 (__mmask8)
6562 __U);
6565 extern __inline __m256i
6566 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6567 _mm256_permutex2var_epi32 (__m256i __A, __m256i __I, __m256i __B)
6569 return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
6570 /* idx */ ,
6571 (__v8si) __A,
6572 (__v8si) __B,
6573 (__mmask8) -1);
6576 extern __inline __m256i
6577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6578 _mm256_mask_permutex2var_epi32 (__m256i __A, __mmask8 __U, __m256i __I,
6579 __m256i __B)
6581 return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
6582 /* idx */ ,
6583 (__v8si) __A,
6584 (__v8si) __B,
6585 (__mmask8) __U);
6588 extern __inline __m256i
6589 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6590 _mm256_mask2_permutex2var_epi32 (__m256i __A, __m256i __I,
6591 __mmask8 __U, __m256i __B)
6593 return (__m256i) __builtin_ia32_vpermi2vard256_mask ((__v8si) __A,
6594 (__v8si) __I
6595 /* idx */ ,
6596 (__v8si) __B,
6597 (__mmask8) __U);
6600 extern __inline __m256i
6601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6602 _mm256_maskz_permutex2var_epi32 (__mmask8 __U, __m256i __A,
6603 __m256i __I, __m256i __B)
6605 return (__m256i) __builtin_ia32_vpermt2vard256_maskz ((__v8si) __I
6606 /* idx */ ,
6607 (__v8si) __A,
6608 (__v8si) __B,
6609 (__mmask8)
6610 __U);
6613 extern __inline __m128d
6614 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6615 _mm_permutex2var_pd (__m128d __A, __m128i __I, __m128d __B)
6617 return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
6618 /* idx */ ,
6619 (__v2df) __A,
6620 (__v2df) __B,
6621 (__mmask8) -1);
6624 extern __inline __m128d
6625 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6626 _mm_mask_permutex2var_pd (__m128d __A, __mmask8 __U, __m128i __I,
6627 __m128d __B)
6629 return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
6630 /* idx */ ,
6631 (__v2df) __A,
6632 (__v2df) __B,
6633 (__mmask8)
6634 __U);
6637 extern __inline __m128d
6638 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6639 _mm_mask2_permutex2var_pd (__m128d __A, __m128i __I, __mmask8 __U,
6640 __m128d __B)
6642 return (__m128d) __builtin_ia32_vpermi2varpd128_mask ((__v2df) __A,
6643 (__v2di) __I
6644 /* idx */ ,
6645 (__v2df) __B,
6646 (__mmask8)
6647 __U);
6650 extern __inline __m128d
6651 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6652 _mm_maskz_permutex2var_pd (__mmask8 __U, __m128d __A, __m128i __I,
6653 __m128d __B)
6655 return (__m128d) __builtin_ia32_vpermt2varpd128_maskz ((__v2di) __I
6656 /* idx */ ,
6657 (__v2df) __A,
6658 (__v2df) __B,
6659 (__mmask8)
6660 __U);
6663 extern __inline __m128
6664 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6665 _mm_permutex2var_ps (__m128 __A, __m128i __I, __m128 __B)
6667 return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
6668 /* idx */ ,
6669 (__v4sf) __A,
6670 (__v4sf) __B,
6671 (__mmask8) -1);
6674 extern __inline __m128
6675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6676 _mm_mask_permutex2var_ps (__m128 __A, __mmask8 __U, __m128i __I,
6677 __m128 __B)
6679 return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
6680 /* idx */ ,
6681 (__v4sf) __A,
6682 (__v4sf) __B,
6683 (__mmask8) __U);
6686 extern __inline __m128
6687 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6688 _mm_mask2_permutex2var_ps (__m128 __A, __m128i __I, __mmask8 __U,
6689 __m128 __B)
6691 return (__m128) __builtin_ia32_vpermi2varps128_mask ((__v4sf) __A,
6692 (__v4si) __I
6693 /* idx */ ,
6694 (__v4sf) __B,
6695 (__mmask8) __U);
6698 extern __inline __m128
6699 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6700 _mm_maskz_permutex2var_ps (__mmask8 __U, __m128 __A, __m128i __I,
6701 __m128 __B)
6703 return (__m128) __builtin_ia32_vpermt2varps128_maskz ((__v4si) __I
6704 /* idx */ ,
6705 (__v4sf) __A,
6706 (__v4sf) __B,
6707 (__mmask8)
6708 __U);
6711 extern __inline __m128i
6712 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6713 _mm_srav_epi64 (__m128i __X, __m128i __Y)
6715 return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
6716 (__v2di) __Y,
6717 (__v2di)
6718 _mm_setzero_si128 (),
6719 (__mmask8) -1);
6722 extern __inline __m128i
6723 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6724 _mm_mask_srav_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
6725 __m128i __Y)
6727 return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
6728 (__v2di) __Y,
6729 (__v2di) __W,
6730 (__mmask8) __U);
6733 extern __inline __m128i
6734 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6735 _mm_maskz_srav_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
6737 return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
6738 (__v2di) __Y,
6739 (__v2di)
6740 _mm_setzero_si128 (),
6741 (__mmask8) __U);
6744 extern __inline __m256i
6745 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6746 _mm256_mask_sllv_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
6747 __m256i __Y)
6749 return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X,
6750 (__v8si) __Y,
6751 (__v8si) __W,
6752 (__mmask8) __U);
6755 extern __inline __m256i
6756 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6757 _mm256_maskz_sllv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
6759 return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X,
6760 (__v8si) __Y,
6761 (__v8si)
6762 _mm256_setzero_si256 (),
6763 (__mmask8) __U);
6766 extern __inline __m128i
6767 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6768 _mm_mask_sllv_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
6769 __m128i __Y)
6771 return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X,
6772 (__v4si) __Y,
6773 (__v4si) __W,
6774 (__mmask8) __U);
6777 extern __inline __m128i
6778 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6779 _mm_maskz_sllv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
6781 return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X,
6782 (__v4si) __Y,
6783 (__v4si)
6784 _mm_setzero_si128 (),
6785 (__mmask8) __U);
6788 extern __inline __m256i
6789 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6790 _mm256_mask_sllv_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
6791 __m256i __Y)
6793 return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X,
6794 (__v4di) __Y,
6795 (__v4di) __W,
6796 (__mmask8) __U);
6799 extern __inline __m256i
6800 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6801 _mm256_maskz_sllv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
6803 return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X,
6804 (__v4di) __Y,
6805 (__v4di)
6806 _mm256_setzero_si256 (),
6807 (__mmask8) __U);
6810 extern __inline __m128i
6811 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6812 _mm_mask_sllv_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
6813 __m128i __Y)
6815 return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
6816 (__v2di) __Y,
6817 (__v2di) __W,
6818 (__mmask8) __U);
6821 extern __inline __m128i
6822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6823 _mm_maskz_sllv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
6825 return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
6826 (__v2di) __Y,
6827 (__v2di)
6828 _mm_setzero_si128 (),
6829 (__mmask8) __U);
6832 extern __inline __m256i
6833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6834 _mm256_mask_srav_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
6835 __m256i __Y)
6837 return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X,
6838 (__v8si) __Y,
6839 (__v8si) __W,
6840 (__mmask8) __U);
6843 extern __inline __m256i
6844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6845 _mm256_maskz_srav_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
6847 return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X,
6848 (__v8si) __Y,
6849 (__v8si)
6850 _mm256_setzero_si256 (),
6851 (__mmask8) __U);
6854 extern __inline __m128i
6855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6856 _mm_mask_srav_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
6857 __m128i __Y)
6859 return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X,
6860 (__v4si) __Y,
6861 (__v4si) __W,
6862 (__mmask8) __U);
6865 extern __inline __m128i
6866 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6867 _mm_maskz_srav_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
6869 return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X,
6870 (__v4si) __Y,
6871 (__v4si)
6872 _mm_setzero_si128 (),
6873 (__mmask8) __U);
6876 extern __inline __m256i
6877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6878 _mm256_mask_srlv_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
6879 __m256i __Y)
6881 return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X,
6882 (__v8si) __Y,
6883 (__v8si) __W,
6884 (__mmask8) __U);
6887 extern __inline __m256i
6888 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6889 _mm256_maskz_srlv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
6891 return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X,
6892 (__v8si) __Y,
6893 (__v8si)
6894 _mm256_setzero_si256 (),
6895 (__mmask8) __U);
6898 extern __inline __m128i
6899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6900 _mm_mask_srlv_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
6901 __m128i __Y)
6903 return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X,
6904 (__v4si) __Y,
6905 (__v4si) __W,
6906 (__mmask8) __U);
6909 extern __inline __m128i
6910 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6911 _mm_maskz_srlv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
6913 return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X,
6914 (__v4si) __Y,
6915 (__v4si)
6916 _mm_setzero_si128 (),
6917 (__mmask8) __U);
6920 extern __inline __m256i
6921 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6922 _mm256_mask_srlv_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
6923 __m256i __Y)
6925 return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X,
6926 (__v4di) __Y,
6927 (__v4di) __W,
6928 (__mmask8) __U);
6931 extern __inline __m256i
6932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6933 _mm256_maskz_srlv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
6935 return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X,
6936 (__v4di) __Y,
6937 (__v4di)
6938 _mm256_setzero_si256 (),
6939 (__mmask8) __U);
6942 extern __inline __m128i
6943 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6944 _mm_mask_srlv_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
6945 __m128i __Y)
6947 return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
6948 (__v2di) __Y,
6949 (__v2di) __W,
6950 (__mmask8) __U);
6953 extern __inline __m128i
6954 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6955 _mm_maskz_srlv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
6957 return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
6958 (__v2di) __Y,
6959 (__v2di)
6960 _mm_setzero_si128 (),
6961 (__mmask8) __U);
6964 extern __inline __m256i
6965 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6966 _mm256_rolv_epi32 (__m256i __A, __m256i __B)
6968 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
6969 (__v8si) __B,
6970 (__v8si)
6971 _mm256_setzero_si256 (),
6972 (__mmask8) -1);
6975 extern __inline __m256i
6976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6977 _mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
6978 __m256i __B)
6980 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
6981 (__v8si) __B,
6982 (__v8si) __W,
6983 (__mmask8) __U);
6986 extern __inline __m256i
6987 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6988 _mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
6990 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
6991 (__v8si) __B,
6992 (__v8si)
6993 _mm256_setzero_si256 (),
6994 (__mmask8) __U);
6997 extern __inline __m128i
6998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6999 _mm_rolv_epi32 (__m128i __A, __m128i __B)
7001 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
7002 (__v4si) __B,
7003 (__v4si)
7004 _mm_setzero_si128 (),
7005 (__mmask8) -1);
7008 extern __inline __m128i
7009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7010 _mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
7011 __m128i __B)
7013 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
7014 (__v4si) __B,
7015 (__v4si) __W,
7016 (__mmask8) __U);
7019 extern __inline __m128i
7020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7021 _mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
7023 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
7024 (__v4si) __B,
7025 (__v4si)
7026 _mm_setzero_si128 (),
7027 (__mmask8) __U);
7030 extern __inline __m256i
7031 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7032 _mm256_rorv_epi32 (__m256i __A, __m256i __B)
7034 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
7035 (__v8si) __B,
7036 (__v8si)
7037 _mm256_setzero_si256 (),
7038 (__mmask8) -1);
7041 extern __inline __m256i
7042 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7043 _mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
7044 __m256i __B)
7046 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
7047 (__v8si) __B,
7048 (__v8si) __W,
7049 (__mmask8) __U);
7052 extern __inline __m256i
7053 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7054 _mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
7056 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
7057 (__v8si) __B,
7058 (__v8si)
7059 _mm256_setzero_si256 (),
7060 (__mmask8) __U);
7063 extern __inline __m128i
7064 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7065 _mm_rorv_epi32 (__m128i __A, __m128i __B)
7067 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
7068 (__v4si) __B,
7069 (__v4si)
7070 _mm_setzero_si128 (),
7071 (__mmask8) -1);
7074 extern __inline __m128i
7075 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7076 _mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
7077 __m128i __B)
7079 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
7080 (__v4si) __B,
7081 (__v4si) __W,
7082 (__mmask8) __U);
7085 extern __inline __m128i
7086 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7087 _mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
7089 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
7090 (__v4si) __B,
7091 (__v4si)
7092 _mm_setzero_si128 (),
7093 (__mmask8) __U);
7096 extern __inline __m256i
7097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7098 _mm256_rolv_epi64 (__m256i __A, __m256i __B)
7100 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
7101 (__v4di) __B,
7102 (__v4di)
7103 _mm256_setzero_si256 (),
7104 (__mmask8) -1);
7107 extern __inline __m256i
7108 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7109 _mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7110 __m256i __B)
7112 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
7113 (__v4di) __B,
7114 (__v4di) __W,
7115 (__mmask8) __U);
7118 extern __inline __m256i
7119 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7120 _mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7122 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
7123 (__v4di) __B,
7124 (__v4di)
7125 _mm256_setzero_si256 (),
7126 (__mmask8) __U);
7129 extern __inline __m128i
7130 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7131 _mm_rolv_epi64 (__m128i __A, __m128i __B)
7133 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
7134 (__v2di) __B,
7135 (__v2di)
7136 _mm_setzero_si128 (),
7137 (__mmask8) -1);
7140 extern __inline __m128i
7141 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7142 _mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7143 __m128i __B)
7145 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
7146 (__v2di) __B,
7147 (__v2di) __W,
7148 (__mmask8) __U);
7151 extern __inline __m128i
7152 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7153 _mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7155 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
7156 (__v2di) __B,
7157 (__v2di)
7158 _mm_setzero_si128 (),
7159 (__mmask8) __U);
7162 extern __inline __m256i
7163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7164 _mm256_rorv_epi64 (__m256i __A, __m256i __B)
7166 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
7167 (__v4di) __B,
7168 (__v4di)
7169 _mm256_setzero_si256 (),
7170 (__mmask8) -1);
7173 extern __inline __m256i
7174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7175 _mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7176 __m256i __B)
7178 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
7179 (__v4di) __B,
7180 (__v4di) __W,
7181 (__mmask8) __U);
7184 extern __inline __m256i
7185 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7186 _mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7188 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
7189 (__v4di) __B,
7190 (__v4di)
7191 _mm256_setzero_si256 (),
7192 (__mmask8) __U);
7195 extern __inline __m128i
7196 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7197 _mm_rorv_epi64 (__m128i __A, __m128i __B)
7199 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
7200 (__v2di) __B,
7201 (__v2di)
7202 _mm_setzero_si128 (),
7203 (__mmask8) -1);
7206 extern __inline __m128i
7207 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7208 _mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7209 __m128i __B)
7211 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
7212 (__v2di) __B,
7213 (__v2di) __W,
7214 (__mmask8) __U);
7217 extern __inline __m128i
7218 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7219 _mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7221 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
7222 (__v2di) __B,
7223 (__v2di)
7224 _mm_setzero_si128 (),
7225 (__mmask8) __U);
7228 extern __inline __m256i
7229 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7230 _mm256_srav_epi64 (__m256i __X, __m256i __Y)
7232 return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
7233 (__v4di) __Y,
7234 (__v4di)
7235 _mm256_setzero_si256 (),
7236 (__mmask8) -1);
7239 extern __inline __m256i
7240 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7241 _mm256_mask_srav_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
7242 __m256i __Y)
7244 return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
7245 (__v4di) __Y,
7246 (__v4di) __W,
7247 (__mmask8) __U);
7250 extern __inline __m256i
7251 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7252 _mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
7254 return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
7255 (__v4di) __Y,
7256 (__v4di)
7257 _mm256_setzero_si256 (),
7258 (__mmask8) __U);
7261 extern __inline __m256i
7262 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7263 _mm256_mask_and_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7264 __m256i __B)
7266 return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A,
7267 (__v4di) __B,
7268 (__v4di) __W, __U);
7271 extern __inline __m256i
7272 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7273 _mm256_maskz_and_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7275 return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A,
7276 (__v4di) __B,
7277 (__v4di)
7278 _mm256_setzero_pd (),
7279 __U);
7282 extern __inline __m128i
7283 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7284 _mm_mask_and_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7285 __m128i __B)
7287 return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A,
7288 (__v2di) __B,
7289 (__v2di) __W, __U);
7292 extern __inline __m128i
7293 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7294 _mm_maskz_and_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7296 return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A,
7297 (__v2di) __B,
7298 (__v2di)
7299 _mm_setzero_pd (),
7300 __U);
7303 extern __inline __m256i
7304 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7305 _mm256_mask_andnot_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7306 __m256i __B)
7308 return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A,
7309 (__v4di) __B,
7310 (__v4di) __W, __U);
7313 extern __inline __m256i
7314 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7315 _mm256_maskz_andnot_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7317 return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A,
7318 (__v4di) __B,
7319 (__v4di)
7320 _mm256_setzero_pd (),
7321 __U);
7324 extern __inline __m128i
7325 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7326 _mm_mask_andnot_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7327 __m128i __B)
7329 return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A,
7330 (__v2di) __B,
7331 (__v2di) __W, __U);
7334 extern __inline __m128i
7335 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7336 _mm_maskz_andnot_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7338 return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A,
7339 (__v2di) __B,
7340 (__v2di)
7341 _mm_setzero_pd (),
7342 __U);
7345 extern __inline __m256i
7346 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7347 _mm256_mask_or_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7348 __m256i __B)
7350 return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A,
7351 (__v4di) __B,
7352 (__v4di) __W,
7353 (__mmask8) __U);
7356 extern __inline __m256i
7357 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7358 _mm256_maskz_or_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7360 return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A,
7361 (__v4di) __B,
7362 (__v4di)
7363 _mm256_setzero_si256 (),
7364 (__mmask8) __U);
7367 extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
7368 _mm256_or_epi64 (__m256i __A, __m256i __B)
7370 return (__m256i) ((__v4du)__A | (__v4du)__B);
7373 extern __inline __m128i
7374 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7375 _mm_mask_or_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
7377 return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
7378 (__v2di) __B,
7379 (__v2di) __W,
7380 (__mmask8) __U);
7383 extern __inline __m128i
7384 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7385 _mm_maskz_or_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7387 return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
7388 (__v2di) __B,
7389 (__v2di)
7390 _mm_setzero_si128 (),
7391 (__mmask8) __U);
7394 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
7395 _mm_or_epi64 (__m128i __A, __m128i __B)
7397 return (__m128i) ((__v2du)__A | (__v2du)__B);
7400 extern __inline __m256i
7401 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7402 _mm256_mask_xor_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7403 __m256i __B)
7405 return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A,
7406 (__v4di) __B,
7407 (__v4di) __W,
7408 (__mmask8) __U);
7411 extern __inline __m256i
7412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7413 _mm256_maskz_xor_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7415 return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A,
7416 (__v4di) __B,
7417 (__v4di)
7418 _mm256_setzero_si256 (),
7419 (__mmask8) __U);
7422 extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
7423 _mm256_xor_epi64 (__m256i __A, __m256i __B)
7425 return (__m256i) ((__v4du)__A ^ (__v4du)__B);
7428 extern __inline __m128i
7429 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7430 _mm_mask_xor_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7431 __m128i __B)
7433 return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A,
7434 (__v2di) __B,
7435 (__v2di) __W,
7436 (__mmask8) __U);
7439 extern __inline __m128i
7440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7441 _mm_maskz_xor_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7443 return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A,
7444 (__v2di) __B,
7445 (__v2di)
7446 _mm_setzero_si128 (),
7447 (__mmask8) __U);
7450 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
7451 _mm_xor_epi64 (__m128i __A, __m128i __B)
7453 return (__m128i) ((__v2du)__A ^ (__v2du)__B);
7456 extern __inline __m256d
7457 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7458 _mm256_mask_max_pd (__m256d __W, __mmask8 __U, __m256d __A,
7459 __m256d __B)
7461 return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
7462 (__v4df) __B,
7463 (__v4df) __W,
7464 (__mmask8) __U);
7467 extern __inline __m256d
7468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7469 _mm256_maskz_max_pd (__mmask8 __U, __m256d __A, __m256d __B)
7471 return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
7472 (__v4df) __B,
7473 (__v4df)
7474 _mm256_setzero_pd (),
7475 (__mmask8) __U);
7478 extern __inline __m256
7479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7480 _mm256_mask_max_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7482 return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
7483 (__v8sf) __B,
7484 (__v8sf) __W,
7485 (__mmask8) __U);
7488 extern __inline __m256
7489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7490 _mm256_maskz_max_ps (__mmask8 __U, __m256 __A, __m256 __B)
7492 return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
7493 (__v8sf) __B,
7494 (__v8sf)
7495 _mm256_setzero_ps (),
7496 (__mmask8) __U);
7499 extern __inline __m128
7500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7501 _mm_mask_div_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7503 return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
7504 (__v4sf) __B,
7505 (__v4sf) __W,
7506 (__mmask8) __U);
7509 extern __inline __m128
7510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7511 _mm_maskz_div_ps (__mmask8 __U, __m128 __A, __m128 __B)
7513 return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
7514 (__v4sf) __B,
7515 (__v4sf)
7516 _mm_setzero_ps (),
7517 (__mmask8) __U);
7520 extern __inline __m128d
7521 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7522 _mm_mask_div_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7524 return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
7525 (__v2df) __B,
7526 (__v2df) __W,
7527 (__mmask8) __U);
7530 extern __inline __m128d
7531 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7532 _mm_maskz_div_pd (__mmask8 __U, __m128d __A, __m128d __B)
7534 return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
7535 (__v2df) __B,
7536 (__v2df)
7537 _mm_setzero_pd (),
7538 (__mmask8) __U);
7541 extern __inline __m256d
7542 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7543 _mm256_mask_min_pd (__m256d __W, __mmask8 __U, __m256d __A,
7544 __m256d __B)
7546 return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
7547 (__v4df) __B,
7548 (__v4df) __W,
7549 (__mmask8) __U);
7552 extern __inline __m256d
7553 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7554 _mm256_mask_div_pd (__m256d __W, __mmask8 __U, __m256d __A,
7555 __m256d __B)
7557 return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
7558 (__v4df) __B,
7559 (__v4df) __W,
7560 (__mmask8) __U);
7563 extern __inline __m256d
7564 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7565 _mm256_maskz_min_pd (__mmask8 __U, __m256d __A, __m256d __B)
7567 return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
7568 (__v4df) __B,
7569 (__v4df)
7570 _mm256_setzero_pd (),
7571 (__mmask8) __U);
7574 extern __inline __m256
7575 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7576 _mm256_mask_min_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7578 return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
7579 (__v8sf) __B,
7580 (__v8sf) __W,
7581 (__mmask8) __U);
7584 extern __inline __m256d
7585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7586 _mm256_maskz_div_pd (__mmask8 __U, __m256d __A, __m256d __B)
7588 return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
7589 (__v4df) __B,
7590 (__v4df)
7591 _mm256_setzero_pd (),
7592 (__mmask8) __U);
7595 extern __inline __m256
7596 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7597 _mm256_mask_div_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7599 return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
7600 (__v8sf) __B,
7601 (__v8sf) __W,
7602 (__mmask8) __U);
7605 extern __inline __m256
7606 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7607 _mm256_maskz_min_ps (__mmask8 __U, __m256 __A, __m256 __B)
7609 return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
7610 (__v8sf) __B,
7611 (__v8sf)
7612 _mm256_setzero_ps (),
7613 (__mmask8) __U);
7616 extern __inline __m256
7617 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7618 _mm256_maskz_div_ps (__mmask8 __U, __m256 __A, __m256 __B)
7620 return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
7621 (__v8sf) __B,
7622 (__v8sf)
7623 _mm256_setzero_ps (),
7624 (__mmask8) __U);
7627 extern __inline __m128
7628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7629 _mm_mask_min_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7631 return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
7632 (__v4sf) __B,
7633 (__v4sf) __W,
7634 (__mmask8) __U);
7637 extern __inline __m128
7638 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7639 _mm_mask_mul_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7641 return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
7642 (__v4sf) __B,
7643 (__v4sf) __W,
7644 (__mmask8) __U);
7647 extern __inline __m128
7648 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7649 _mm_maskz_min_ps (__mmask8 __U, __m128 __A, __m128 __B)
7651 return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
7652 (__v4sf) __B,
7653 (__v4sf)
7654 _mm_setzero_ps (),
7655 (__mmask8) __U);
7658 extern __inline __m128
7659 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7660 _mm_maskz_mul_ps (__mmask8 __U, __m128 __A, __m128 __B)
7662 return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
7663 (__v4sf) __B,
7664 (__v4sf)
7665 _mm_setzero_ps (),
7666 (__mmask8) __U);
7669 extern __inline __m128
7670 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7671 _mm_mask_max_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7673 return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
7674 (__v4sf) __B,
7675 (__v4sf) __W,
7676 (__mmask8) __U);
7679 extern __inline __m128
7680 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7681 _mm_maskz_max_ps (__mmask8 __U, __m128 __A, __m128 __B)
7683 return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
7684 (__v4sf) __B,
7685 (__v4sf)
7686 _mm_setzero_ps (),
7687 (__mmask8) __U);
7690 extern __inline __m128d
7691 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7692 _mm_mask_min_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7694 return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
7695 (__v2df) __B,
7696 (__v2df) __W,
7697 (__mmask8) __U);
7700 extern __inline __m128d
7701 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7702 _mm_maskz_min_pd (__mmask8 __U, __m128d __A, __m128d __B)
7704 return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
7705 (__v2df) __B,
7706 (__v2df)
7707 _mm_setzero_pd (),
7708 (__mmask8) __U);
7711 extern __inline __m128d
7712 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7713 _mm_mask_max_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7715 return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
7716 (__v2df) __B,
7717 (__v2df) __W,
7718 (__mmask8) __U);
7721 extern __inline __m128d
7722 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7723 _mm_maskz_max_pd (__mmask8 __U, __m128d __A, __m128d __B)
7725 return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
7726 (__v2df) __B,
7727 (__v2df)
7728 _mm_setzero_pd (),
7729 (__mmask8) __U);
7732 extern __inline __m128d
7733 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7734 _mm_mask_mul_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7736 return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
7737 (__v2df) __B,
7738 (__v2df) __W,
7739 (__mmask8) __U);
7742 extern __inline __m128d
7743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7744 _mm_maskz_mul_pd (__mmask8 __U, __m128d __A, __m128d __B)
7746 return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
7747 (__v2df) __B,
7748 (__v2df)
7749 _mm_setzero_pd (),
7750 (__mmask8) __U);
7753 extern __inline __m256
7754 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7755 _mm256_mask_mul_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7757 return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
7758 (__v8sf) __B,
7759 (__v8sf) __W,
7760 (__mmask8) __U);
7763 extern __inline __m256
7764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7765 _mm256_maskz_mul_ps (__mmask8 __U, __m256 __A, __m256 __B)
7767 return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
7768 (__v8sf) __B,
7769 (__v8sf)
7770 _mm256_setzero_ps (),
7771 (__mmask8) __U);
7774 extern __inline __m256d
7775 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7776 _mm256_mask_mul_pd (__m256d __W, __mmask8 __U, __m256d __A,
7777 __m256d __B)
7779 return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
7780 (__v4df) __B,
7781 (__v4df) __W,
7782 (__mmask8) __U);
7785 extern __inline __m256d
7786 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7787 _mm256_maskz_mul_pd (__mmask8 __U, __m256d __A, __m256d __B)
7789 return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
7790 (__v4df) __B,
7791 (__v4df)
7792 _mm256_setzero_pd (),
7793 (__mmask8) __U);
7796 extern __inline __m256i
7797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7798 _mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B)
7800 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
7801 (__v4di) __B,
7802 (__v4di)
7803 _mm256_setzero_si256 (),
7804 __M);
7807 extern __inline __m256i
7808 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7809 _mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
7810 __m256i __B)
7812 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
7813 (__v4di) __B,
7814 (__v4di) __W, __M);
7817 extern __inline __m256i
7818 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7819 _mm256_min_epi64 (__m256i __A, __m256i __B)
7821 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
7822 (__v4di) __B,
7823 (__v4di)
7824 _mm256_setzero_si256 (),
7825 (__mmask8) -1);
7828 extern __inline __m256i
7829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7830 _mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
7831 __m256i __B)
7833 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
7834 (__v4di) __B,
7835 (__v4di) __W, __M);
7838 extern __inline __m256i
7839 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7840 _mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B)
7842 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
7843 (__v4di) __B,
7844 (__v4di)
7845 _mm256_setzero_si256 (),
7846 __M);
7849 extern __inline __m256i
7850 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7851 _mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B)
7853 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
7854 (__v4di) __B,
7855 (__v4di)
7856 _mm256_setzero_si256 (),
7857 __M);
7860 extern __inline __m256i
7861 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7862 _mm256_max_epi64 (__m256i __A, __m256i __B)
7864 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
7865 (__v4di) __B,
7866 (__v4di)
7867 _mm256_setzero_si256 (),
7868 (__mmask8) -1);
7871 extern __inline __m256i
7872 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7873 _mm256_max_epu64 (__m256i __A, __m256i __B)
7875 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
7876 (__v4di) __B,
7877 (__v4di)
7878 _mm256_setzero_si256 (),
7879 (__mmask8) -1);
7882 extern __inline __m256i
7883 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7884 _mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
7885 __m256i __B)
7887 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
7888 (__v4di) __B,
7889 (__v4di) __W, __M);
7892 extern __inline __m256i
7893 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7894 _mm256_min_epu64 (__m256i __A, __m256i __B)
7896 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
7897 (__v4di) __B,
7898 (__v4di)
7899 _mm256_setzero_si256 (),
7900 (__mmask8) -1);
7903 extern __inline __m256i
7904 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7905 _mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
7906 __m256i __B)
7908 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
7909 (__v4di) __B,
7910 (__v4di) __W, __M);
7913 extern __inline __m256i
7914 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7915 _mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B)
7917 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
7918 (__v4di) __B,
7919 (__v4di)
7920 _mm256_setzero_si256 (),
7921 __M);
7924 extern __inline __m256i
7925 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7926 _mm256_maskz_max_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
7928 return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
7929 (__v8si) __B,
7930 (__v8si)
7931 _mm256_setzero_si256 (),
7932 __M);
7935 extern __inline __m256i
7936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7937 _mm256_mask_max_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
7938 __m256i __B)
7940 return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
7941 (__v8si) __B,
7942 (__v8si) __W, __M);
7945 extern __inline __m256i
7946 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7947 _mm256_maskz_min_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
7949 return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
7950 (__v8si) __B,
7951 (__v8si)
7952 _mm256_setzero_si256 (),
7953 __M);
7956 extern __inline __m256i
7957 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7958 _mm256_mask_min_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
7959 __m256i __B)
7961 return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
7962 (__v8si) __B,
7963 (__v8si) __W, __M);
7966 extern __inline __m256i
7967 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7968 _mm256_maskz_max_epu32 (__mmask8 __M, __m256i __A, __m256i __B)
7970 return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
7971 (__v8si) __B,
7972 (__v8si)
7973 _mm256_setzero_si256 (),
7974 __M);
7977 extern __inline __m256i
7978 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7979 _mm256_mask_max_epu32 (__m256i __W, __mmask8 __M, __m256i __A,
7980 __m256i __B)
7982 return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
7983 (__v8si) __B,
7984 (__v8si) __W, __M);
7987 extern __inline __m256i
7988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7989 _mm256_maskz_min_epu32 (__mmask8 __M, __m256i __A, __m256i __B)
7991 return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
7992 (__v8si) __B,
7993 (__v8si)
7994 _mm256_setzero_si256 (),
7995 __M);
7998 extern __inline __m256i
7999 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8000 _mm256_mask_min_epu32 (__m256i __W, __mmask8 __M, __m256i __A,
8001 __m256i __B)
8003 return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
8004 (__v8si) __B,
8005 (__v8si) __W, __M);
8008 extern __inline __m128i
8009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8010 _mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B)
8012 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
8013 (__v2di) __B,
8014 (__v2di)
8015 _mm_setzero_si128 (),
8016 __M);
8019 extern __inline __m128i
8020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8021 _mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
8022 __m128i __B)
8024 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
8025 (__v2di) __B,
8026 (__v2di) __W, __M);
8029 extern __inline __m128i
8030 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8031 _mm_min_epi64 (__m128i __A, __m128i __B)
8033 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
8034 (__v2di) __B,
8035 (__v2di)
8036 _mm_setzero_si128 (),
8037 (__mmask8) -1);
8040 extern __inline __m128i
8041 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8042 _mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
8043 __m128i __B)
8045 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
8046 (__v2di) __B,
8047 (__v2di) __W, __M);
8050 extern __inline __m128i
8051 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8052 _mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B)
8054 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
8055 (__v2di) __B,
8056 (__v2di)
8057 _mm_setzero_si128 (),
8058 __M);
8061 extern __inline __m128i
8062 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8063 _mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B)
8065 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
8066 (__v2di) __B,
8067 (__v2di)
8068 _mm_setzero_si128 (),
8069 __M);
8072 extern __inline __m128i
8073 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8074 _mm_max_epi64 (__m128i __A, __m128i __B)
8076 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
8077 (__v2di) __B,
8078 (__v2di)
8079 _mm_setzero_si128 (),
8080 (__mmask8) -1);
8083 extern __inline __m128i
8084 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8085 _mm_max_epu64 (__m128i __A, __m128i __B)
8087 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
8088 (__v2di) __B,
8089 (__v2di)
8090 _mm_setzero_si128 (),
8091 (__mmask8) -1);
8094 extern __inline __m128i
8095 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8096 _mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
8097 __m128i __B)
8099 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
8100 (__v2di) __B,
8101 (__v2di) __W, __M);
8104 extern __inline __m128i
8105 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8106 _mm_min_epu64 (__m128i __A, __m128i __B)
8108 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
8109 (__v2di) __B,
8110 (__v2di)
8111 _mm_setzero_si128 (),
8112 (__mmask8) -1);
8115 extern __inline __m128i
8116 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8117 _mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
8118 __m128i __B)
8120 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
8121 (__v2di) __B,
8122 (__v2di) __W, __M);
8125 extern __inline __m128i
8126 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8127 _mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B)
8129 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
8130 (__v2di) __B,
8131 (__v2di)
8132 _mm_setzero_si128 (),
8133 __M);
8136 extern __inline __m128i
8137 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8138 _mm_maskz_max_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
8140 return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
8141 (__v4si) __B,
8142 (__v4si)
8143 _mm_setzero_si128 (),
8144 __M);
8147 extern __inline __m128i
8148 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8149 _mm_mask_max_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
8150 __m128i __B)
8152 return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
8153 (__v4si) __B,
8154 (__v4si) __W, __M);
8157 extern __inline __m128i
8158 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8159 _mm_maskz_min_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
8161 return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
8162 (__v4si) __B,
8163 (__v4si)
8164 _mm_setzero_si128 (),
8165 __M);
8168 extern __inline __m128i
8169 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8170 _mm_mask_min_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
8171 __m128i __B)
8173 return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
8174 (__v4si) __B,
8175 (__v4si) __W, __M);
8178 extern __inline __m128i
8179 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8180 _mm_maskz_max_epu32 (__mmask8 __M, __m128i __A, __m128i __B)
8182 return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
8183 (__v4si) __B,
8184 (__v4si)
8185 _mm_setzero_si128 (),
8186 __M);
8189 extern __inline __m128i
8190 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8191 _mm_mask_max_epu32 (__m128i __W, __mmask8 __M, __m128i __A,
8192 __m128i __B)
8194 return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
8195 (__v4si) __B,
8196 (__v4si) __W, __M);
8199 extern __inline __m128i
8200 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8201 _mm_maskz_min_epu32 (__mmask8 __M, __m128i __A, __m128i __B)
8203 return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
8204 (__v4si) __B,
8205 (__v4si)
8206 _mm_setzero_si128 (),
8207 __M);
8210 extern __inline __m128i
8211 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8212 _mm_mask_min_epu32 (__m128i __W, __mmask8 __M, __m128i __A,
8213 __m128i __B)
8215 return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
8216 (__v4si) __B,
8217 (__v4si) __W, __M);
8220 #ifndef __AVX512CD__
8221 #pragma GCC push_options
8222 #pragma GCC target("avx512vl,avx512cd")
8223 #define __DISABLE_AVX512VLCD__
8224 #endif
8226 extern __inline __m128i
8227 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8228 _mm_broadcastmb_epi64 (__mmask8 __A)
8230 return (__m128i) __builtin_ia32_broadcastmb128 (__A);
8233 extern __inline __m256i
8234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8235 _mm256_broadcastmb_epi64 (__mmask8 __A)
8237 return (__m256i) __builtin_ia32_broadcastmb256 (__A);
8240 extern __inline __m128i
8241 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8242 _mm_broadcastmw_epi32 (__mmask16 __A)
8244 return (__m128i) __builtin_ia32_broadcastmw128 (__A);
8247 extern __inline __m256i
8248 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8249 _mm256_broadcastmw_epi32 (__mmask16 __A)
8251 return (__m256i) __builtin_ia32_broadcastmw256 (__A);
8254 extern __inline __m256i
8255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8256 _mm256_lzcnt_epi32 (__m256i __A)
8258 return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
8259 (__v8si)
8260 _mm256_setzero_si256 (),
8261 (__mmask8) -1);
8264 extern __inline __m256i
8265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8266 _mm256_mask_lzcnt_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
8268 return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
8269 (__v8si) __W,
8270 (__mmask8) __U);
8273 extern __inline __m256i
8274 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8275 _mm256_maskz_lzcnt_epi32 (__mmask8 __U, __m256i __A)
8277 return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
8278 (__v8si)
8279 _mm256_setzero_si256 (),
8280 (__mmask8) __U);
8283 extern __inline __m256i
8284 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8285 _mm256_lzcnt_epi64 (__m256i __A)
8287 return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
8288 (__v4di)
8289 _mm256_setzero_si256 (),
8290 (__mmask8) -1);
8293 extern __inline __m256i
8294 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8295 _mm256_mask_lzcnt_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
8297 return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
8298 (__v4di) __W,
8299 (__mmask8) __U);
8302 extern __inline __m256i
8303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8304 _mm256_maskz_lzcnt_epi64 (__mmask8 __U, __m256i __A)
8306 return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
8307 (__v4di)
8308 _mm256_setzero_si256 (),
8309 (__mmask8) __U);
8312 extern __inline __m256i
8313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8314 _mm256_conflict_epi64 (__m256i __A)
8316 return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
8317 (__v4di)
8318 _mm256_setzero_si256 (),
8319 (__mmask8) -1);
8322 extern __inline __m256i
8323 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8324 _mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
8326 return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
8327 (__v4di) __W,
8328 (__mmask8)
8329 __U);
8332 extern __inline __m256i
8333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8334 _mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A)
8336 return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
8337 (__v4di)
8338 _mm256_setzero_si256 (),
8339 (__mmask8)
8340 __U);
8343 extern __inline __m256i
8344 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8345 _mm256_conflict_epi32 (__m256i __A)
8347 return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
8348 (__v8si)
8349 _mm256_setzero_si256 (),
8350 (__mmask8) -1);
8353 extern __inline __m256i
8354 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8355 _mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
8357 return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
8358 (__v8si) __W,
8359 (__mmask8)
8360 __U);
8363 extern __inline __m256i
8364 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8365 _mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A)
8367 return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
8368 (__v8si)
8369 _mm256_setzero_si256 (),
8370 (__mmask8)
8371 __U);
8374 extern __inline __m128i
8375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8376 _mm_lzcnt_epi32 (__m128i __A)
8378 return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
8379 (__v4si)
8380 _mm_setzero_si128 (),
8381 (__mmask8) -1);
8384 extern __inline __m128i
8385 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8386 _mm_mask_lzcnt_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
8388 return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
8389 (__v4si) __W,
8390 (__mmask8) __U);
8393 extern __inline __m128i
8394 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8395 _mm_maskz_lzcnt_epi32 (__mmask8 __U, __m128i __A)
8397 return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
8398 (__v4si)
8399 _mm_setzero_si128 (),
8400 (__mmask8) __U);
8403 extern __inline __m128i
8404 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8405 _mm_lzcnt_epi64 (__m128i __A)
8407 return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
8408 (__v2di)
8409 _mm_setzero_si128 (),
8410 (__mmask8) -1);
8413 extern __inline __m128i
8414 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8415 _mm_mask_lzcnt_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
8417 return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
8418 (__v2di) __W,
8419 (__mmask8) __U);
8422 extern __inline __m128i
8423 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8424 _mm_maskz_lzcnt_epi64 (__mmask8 __U, __m128i __A)
8426 return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
8427 (__v2di)
8428 _mm_setzero_si128 (),
8429 (__mmask8) __U);
8432 extern __inline __m128i
8433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8434 _mm_conflict_epi64 (__m128i __A)
8436 return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
8437 (__v2di)
8438 _mm_setzero_si128 (),
8439 (__mmask8) -1);
8442 extern __inline __m128i
8443 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8444 _mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
8446 return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
8447 (__v2di) __W,
8448 (__mmask8)
8449 __U);
8452 extern __inline __m128i
8453 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8454 _mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A)
8456 return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
8457 (__v2di)
8458 _mm_setzero_si128 (),
8459 (__mmask8)
8460 __U);
8463 extern __inline __m128i
8464 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8465 _mm_conflict_epi32 (__m128i __A)
8467 return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
8468 (__v4si)
8469 _mm_setzero_si128 (),
8470 (__mmask8) -1);
8473 extern __inline __m128i
8474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8475 _mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
8477 return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
8478 (__v4si) __W,
8479 (__mmask8)
8480 __U);
8483 extern __inline __m128i
8484 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8485 _mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A)
8487 return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
8488 (__v4si)
8489 _mm_setzero_si128 (),
8490 (__mmask8)
8491 __U);
8494 #ifdef __DISABLE_AVX512VLCD__
8495 #pragma GCC pop_options
8496 #endif
8498 extern __inline __m256d
8499 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8500 _mm256_mask_unpacklo_pd (__m256d __W, __mmask8 __U, __m256d __A,
8501 __m256d __B)
8503 return (__m256d) __builtin_ia32_unpcklpd256_mask ((__v4df) __A,
8504 (__v4df) __B,
8505 (__v4df) __W,
8506 (__mmask8) __U);
8509 extern __inline __m256d
8510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8511 _mm256_maskz_unpacklo_pd (__mmask8 __U, __m256d __A, __m256d __B)
8513 return (__m256d) __builtin_ia32_unpcklpd256_mask ((__v4df) __A,
8514 (__v4df) __B,
8515 (__v4df)
8516 _mm256_setzero_pd (),
8517 (__mmask8) __U);
8520 extern __inline __m128d
8521 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8522 _mm_mask_unpacklo_pd (__m128d __W, __mmask8 __U, __m128d __A,
8523 __m128d __B)
8525 return (__m128d) __builtin_ia32_unpcklpd128_mask ((__v2df) __A,
8526 (__v2df) __B,
8527 (__v2df) __W,
8528 (__mmask8) __U);
8531 extern __inline __m128d
8532 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8533 _mm_maskz_unpacklo_pd (__mmask8 __U, __m128d __A, __m128d __B)
8535 return (__m128d) __builtin_ia32_unpcklpd128_mask ((__v2df) __A,
8536 (__v2df) __B,
8537 (__v2df)
8538 _mm_setzero_pd (),
8539 (__mmask8) __U);
8542 extern __inline __m256
8543 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8544 _mm256_mask_unpacklo_ps (__m256 __W, __mmask8 __U, __m256 __A,
8545 __m256 __B)
8547 return (__m256) __builtin_ia32_unpcklps256_mask ((__v8sf) __A,
8548 (__v8sf) __B,
8549 (__v8sf) __W,
8550 (__mmask8) __U);
8553 extern __inline __m256d
8554 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8555 _mm256_mask_unpackhi_pd (__m256d __W, __mmask8 __U, __m256d __A,
8556 __m256d __B)
8558 return (__m256d) __builtin_ia32_unpckhpd256_mask ((__v4df) __A,
8559 (__v4df) __B,
8560 (__v4df) __W,
8561 (__mmask8) __U);
8564 extern __inline __m256d
8565 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8566 _mm256_maskz_unpackhi_pd (__mmask8 __U, __m256d __A, __m256d __B)
8568 return (__m256d) __builtin_ia32_unpckhpd256_mask ((__v4df) __A,
8569 (__v4df) __B,
8570 (__v4df)
8571 _mm256_setzero_pd (),
8572 (__mmask8) __U);
8575 extern __inline __m128d
8576 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8577 _mm_mask_unpackhi_pd (__m128d __W, __mmask8 __U, __m128d __A,
8578 __m128d __B)
8580 return (__m128d) __builtin_ia32_unpckhpd128_mask ((__v2df) __A,
8581 (__v2df) __B,
8582 (__v2df) __W,
8583 (__mmask8) __U);
8586 extern __inline __m128d
8587 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8588 _mm_maskz_unpackhi_pd (__mmask8 __U, __m128d __A, __m128d __B)
8590 return (__m128d) __builtin_ia32_unpckhpd128_mask ((__v2df) __A,
8591 (__v2df) __B,
8592 (__v2df)
8593 _mm_setzero_pd (),
8594 (__mmask8) __U);
8597 extern __inline __m256
8598 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8599 _mm256_mask_unpackhi_ps (__m256 __W, __mmask8 __U, __m256 __A,
8600 __m256 __B)
8602 return (__m256) __builtin_ia32_unpckhps256_mask ((__v8sf) __A,
8603 (__v8sf) __B,
8604 (__v8sf) __W,
8605 (__mmask8) __U);
8608 extern __inline __m256
8609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8610 _mm256_maskz_unpackhi_ps (__mmask8 __U, __m256 __A, __m256 __B)
8612 return (__m256) __builtin_ia32_unpckhps256_mask ((__v8sf) __A,
8613 (__v8sf) __B,
8614 (__v8sf)
8615 _mm256_setzero_ps (),
8616 (__mmask8) __U);
8619 extern __inline __m128
8620 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8621 _mm_mask_unpackhi_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8623 return (__m128) __builtin_ia32_unpckhps128_mask ((__v4sf) __A,
8624 (__v4sf) __B,
8625 (__v4sf) __W,
8626 (__mmask8) __U);
8629 extern __inline __m128
8630 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8631 _mm_maskz_unpackhi_ps (__mmask8 __U, __m128 __A, __m128 __B)
8633 return (__m128) __builtin_ia32_unpckhps128_mask ((__v4sf) __A,
8634 (__v4sf) __B,
8635 (__v4sf)
8636 _mm_setzero_ps (),
8637 (__mmask8) __U);
8640 extern __inline __m128
8641 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8642 _mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A)
8644 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8645 (__v4sf) __W,
8646 (__mmask8) __U);
8649 extern __inline __m128
8650 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8651 _mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8653 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8654 (__v4sf)
8655 _mm_setzero_ps (),
8656 (__mmask8) __U);
8659 extern __inline __m256
8660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8661 _mm256_maskz_unpacklo_ps (__mmask8 __U, __m256 __A, __m256 __B)
8663 return (__m256) __builtin_ia32_unpcklps256_mask ((__v8sf) __A,
8664 (__v8sf) __B,
8665 (__v8sf)
8666 _mm256_setzero_ps (),
8667 (__mmask8) __U);
8670 extern __inline __m256
8671 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8672 _mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A)
8674 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8675 (__v8sf) __W,
8676 (__mmask8) __U);
8679 extern __inline __m256
8680 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8681 _mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8683 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8684 (__v8sf)
8685 _mm256_setzero_ps (),
8686 (__mmask8) __U);
8689 extern __inline __m128
8690 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8691 _mm_mask_unpacklo_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8693 return (__m128) __builtin_ia32_unpcklps128_mask ((__v4sf) __A,
8694 (__v4sf) __B,
8695 (__v4sf) __W,
8696 (__mmask8) __U);
8699 extern __inline __m128
8700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8701 _mm_maskz_unpacklo_ps (__mmask8 __U, __m128 __A, __m128 __B)
8703 return (__m128) __builtin_ia32_unpcklps128_mask ((__v4sf) __A,
8704 (__v4sf) __B,
8705 (__v4sf)
8706 _mm_setzero_ps (),
8707 (__mmask8) __U);
8710 extern __inline __m256i
8711 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8712 _mm256_mask_sra_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
8713 __m128i __B)
8715 return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A,
8716 (__v4si) __B,
8717 (__v8si) __W,
8718 (__mmask8) __U);
8721 extern __inline __m256i
8722 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8723 _mm256_maskz_sra_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
8725 return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A,
8726 (__v4si) __B,
8727 (__v8si)
8728 _mm256_setzero_si256 (),
8729 (__mmask8) __U);
8732 extern __inline __m128i
8733 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8734 _mm_mask_sra_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
8735 __m128i __B)
8737 return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A,
8738 (__v4si) __B,
8739 (__v4si) __W,
8740 (__mmask8) __U);
8743 extern __inline __m128i
8744 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8745 _mm_maskz_sra_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
8747 return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A,
8748 (__v4si) __B,
8749 (__v4si)
8750 _mm_setzero_si128 (),
8751 (__mmask8) __U);
8754 extern __inline __m256i
8755 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8756 _mm256_sra_epi64 (__m256i __A, __m128i __B)
8758 return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
8759 (__v2di) __B,
8760 (__v4di)
8761 _mm256_setzero_si256 (),
8762 (__mmask8) -1);
8765 extern __inline __m256i
8766 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8767 _mm256_mask_sra_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
8768 __m128i __B)
8770 return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
8771 (__v2di) __B,
8772 (__v4di) __W,
8773 (__mmask8) __U);
8776 extern __inline __m256i
8777 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8778 _mm256_maskz_sra_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
8780 return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
8781 (__v2di) __B,
8782 (__v4di)
8783 _mm256_setzero_si256 (),
8784 (__mmask8) __U);
8787 extern __inline __m128i
8788 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8789 _mm_sra_epi64 (__m128i __A, __m128i __B)
8791 return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
8792 (__v2di) __B,
8793 (__v2di)
8794 _mm_setzero_si128 (),
8795 (__mmask8) -1);
8798 extern __inline __m128i
8799 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8800 _mm_mask_sra_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
8801 __m128i __B)
8803 return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
8804 (__v2di) __B,
8805 (__v2di) __W,
8806 (__mmask8) __U);
8809 extern __inline __m128i
8810 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8811 _mm_maskz_sra_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
8813 return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
8814 (__v2di) __B,
8815 (__v2di)
8816 _mm_setzero_si128 (),
8817 (__mmask8) __U);
8820 extern __inline __m128i
8821 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8822 _mm_mask_sll_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
8823 __m128i __B)
8825 return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A,
8826 (__v4si) __B,
8827 (__v4si) __W,
8828 (__mmask8) __U);
8831 extern __inline __m128i
8832 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8833 _mm_maskz_sll_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
8835 return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A,
8836 (__v4si) __B,
8837 (__v4si)
8838 _mm_setzero_si128 (),
8839 (__mmask8) __U);
8842 extern __inline __m128i
8843 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8844 _mm_mask_sll_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
8845 __m128i __B)
8847 return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
8848 (__v2di) __B,
8849 (__v2di) __W,
8850 (__mmask8) __U);
8853 extern __inline __m128i
8854 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8855 _mm_maskz_sll_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
8857 return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
8858 (__v2di) __B,
8859 (__v2di)
8860 _mm_setzero_si128 (),
8861 (__mmask8) __U);
8864 extern __inline __m256i
8865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8866 _mm256_mask_sll_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
8867 __m128i __B)
8869 return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A,
8870 (__v4si) __B,
8871 (__v8si) __W,
8872 (__mmask8) __U);
8875 extern __inline __m256i
8876 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8877 _mm256_maskz_sll_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
8879 return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A,
8880 (__v4si) __B,
8881 (__v8si)
8882 _mm256_setzero_si256 (),
8883 (__mmask8) __U);
8886 extern __inline __m256i
8887 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8888 _mm256_mask_sll_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
8889 __m128i __B)
8891 return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A,
8892 (__v2di) __B,
8893 (__v4di) __W,
8894 (__mmask8) __U);
8897 extern __inline __m256i
8898 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8899 _mm256_maskz_sll_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
8901 return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A,
8902 (__v2di) __B,
8903 (__v4di)
8904 _mm256_setzero_si256 (),
8905 (__mmask8) __U);
8908 extern __inline __m256
8909 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8910 _mm256_mask_permutexvar_ps (__m256 __W, __mmask8 __U, __m256i __X,
8911 __m256 __Y)
8913 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
8914 (__v8si) __X,
8915 (__v8sf) __W,
8916 (__mmask8) __U);
8919 extern __inline __m256
8920 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8921 _mm256_maskz_permutexvar_ps (__mmask8 __U, __m256i __X, __m256 __Y)
8923 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
8924 (__v8si) __X,
8925 (__v8sf)
8926 _mm256_setzero_ps (),
8927 (__mmask8) __U);
8930 extern __inline __m256d
8931 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8932 _mm256_permutexvar_pd (__m256i __X, __m256d __Y)
8934 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8935 (__v4di) __X,
8936 (__v4df)
8937 _mm256_setzero_pd (),
8938 (__mmask8) -1);
8941 extern __inline __m256d
8942 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8943 _mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X,
8944 __m256d __Y)
8946 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8947 (__v4di) __X,
8948 (__v4df) __W,
8949 (__mmask8) __U);
8952 extern __inline __m256d
8953 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8954 _mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y)
8956 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8957 (__v4di) __X,
8958 (__v4df)
8959 _mm256_setzero_pd (),
8960 (__mmask8) __U);
8963 extern __inline __m256d
8964 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8965 _mm256_mask_permutevar_pd (__m256d __W, __mmask8 __U, __m256d __A,
8966 __m256i __C)
8968 return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
8969 (__v4di) __C,
8970 (__v4df) __W,
8971 (__mmask8)
8972 __U);
8975 extern __inline __m256d
8976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8977 _mm256_maskz_permutevar_pd (__mmask8 __U, __m256d __A, __m256i __C)
8979 return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
8980 (__v4di) __C,
8981 (__v4df)
8982 _mm256_setzero_pd (),
8983 (__mmask8)
8984 __U);
8987 extern __inline __m256
8988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8989 _mm256_mask_permutevar_ps (__m256 __W, __mmask8 __U, __m256 __A,
8990 __m256i __C)
8992 return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
8993 (__v8si) __C,
8994 (__v8sf) __W,
8995 (__mmask8) __U);
8998 extern __inline __m256
8999 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9000 _mm256_maskz_permutevar_ps (__mmask8 __U, __m256 __A, __m256i __C)
9002 return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
9003 (__v8si) __C,
9004 (__v8sf)
9005 _mm256_setzero_ps (),
9006 (__mmask8) __U);
9009 extern __inline __m128d
9010 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9011 _mm_mask_permutevar_pd (__m128d __W, __mmask8 __U, __m128d __A,
9012 __m128i __C)
9014 return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
9015 (__v2di) __C,
9016 (__v2df) __W,
9017 (__mmask8) __U);
9020 extern __inline __m128d
9021 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9022 _mm_maskz_permutevar_pd (__mmask8 __U, __m128d __A, __m128i __C)
9024 return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
9025 (__v2di) __C,
9026 (__v2df)
9027 _mm_setzero_pd (),
9028 (__mmask8) __U);
9031 extern __inline __m128
9032 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9033 _mm_mask_permutevar_ps (__m128 __W, __mmask8 __U, __m128 __A,
9034 __m128i __C)
9036 return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
9037 (__v4si) __C,
9038 (__v4sf) __W,
9039 (__mmask8) __U);
9042 extern __inline __m128
9043 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9044 _mm_maskz_permutevar_ps (__mmask8 __U, __m128 __A, __m128i __C)
9046 return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
9047 (__v4si) __C,
9048 (__v4sf)
9049 _mm_setzero_ps (),
9050 (__mmask8) __U);
9053 extern __inline __m256i
9054 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9055 _mm256_maskz_mullo_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
9057 return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
9058 (__v8si) __B,
9059 (__v8si)
9060 _mm256_setzero_si256 (),
9061 __M);
9064 extern __inline __m256i
9065 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9066 _mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y)
9068 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
9069 (__v4di) __X,
9070 (__v4di)
9071 _mm256_setzero_si256 (),
9072 __M);
9075 extern __inline __m256i
9076 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9077 _mm256_mask_mullo_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
9078 __m256i __B)
9080 return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
9081 (__v8si) __B,
9082 (__v8si) __W, __M);
9085 extern __inline __m128i
9086 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9087 _mm_maskz_mullo_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
9089 return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
9090 (__v4si) __B,
9091 (__v4si)
9092 _mm_setzero_si128 (),
9093 __M);
9096 extern __inline __m128i
9097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9098 _mm_mask_mullo_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
9099 __m128i __B)
9101 return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
9102 (__v4si) __B,
9103 (__v4si) __W, __M);
9106 extern __inline __m256i
9107 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9108 _mm256_mask_mul_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
9109 __m256i __Y)
9111 return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
9112 (__v8si) __Y,
9113 (__v4di) __W, __M);
9116 extern __inline __m256i
9117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9118 _mm256_maskz_mul_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
9120 return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
9121 (__v8si) __Y,
9122 (__v4di)
9123 _mm256_setzero_si256 (),
9124 __M);
9127 extern __inline __m128i
9128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9129 _mm_mask_mul_epi32 (__m128i __W, __mmask8 __M, __m128i __X,
9130 __m128i __Y)
9132 return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
9133 (__v4si) __Y,
9134 (__v2di) __W, __M);
9137 extern __inline __m128i
9138 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9139 _mm_maskz_mul_epi32 (__mmask8 __M, __m128i __X, __m128i __Y)
9141 return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
9142 (__v4si) __Y,
9143 (__v2di)
9144 _mm_setzero_si128 (),
9145 __M);
9148 extern __inline __m256i
9149 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9150 _mm256_permutexvar_epi64 (__m256i __X, __m256i __Y)
9152 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
9153 (__v4di) __X,
9154 (__v4di)
9155 _mm256_setzero_si256 (),
9156 (__mmask8) -1);
9159 extern __inline __m256i
9160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9161 _mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X,
9162 __m256i __Y)
9164 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
9165 (__v4di) __X,
9166 (__v4di) __W,
9167 __M);
9170 extern __inline __m256i
9171 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9172 _mm256_mask_mul_epu32 (__m256i __W, __mmask8 __M, __m256i __X,
9173 __m256i __Y)
9175 return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
9176 (__v8si) __Y,
9177 (__v4di) __W, __M);
9180 extern __inline __m256i
9181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9182 _mm256_maskz_permutexvar_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
9184 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
9185 (__v8si) __X,
9186 (__v8si)
9187 _mm256_setzero_si256 (),
9188 __M);
9191 extern __inline __m256i
9192 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9193 _mm256_maskz_mul_epu32 (__mmask8 __M, __m256i __X, __m256i __Y)
9195 return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
9196 (__v8si) __Y,
9197 (__v4di)
9198 _mm256_setzero_si256 (),
9199 __M);
9202 extern __inline __m128i
9203 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9204 _mm_mask_mul_epu32 (__m128i __W, __mmask8 __M, __m128i __X,
9205 __m128i __Y)
9207 return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
9208 (__v4si) __Y,
9209 (__v2di) __W, __M);
9212 extern __inline __m128i
9213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9214 _mm_maskz_mul_epu32 (__mmask8 __M, __m128i __X, __m128i __Y)
9216 return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
9217 (__v4si) __Y,
9218 (__v2di)
9219 _mm_setzero_si128 (),
9220 __M);
9223 extern __inline __m256i
9224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9225 _mm256_permutexvar_epi32 (__m256i __X, __m256i __Y)
9227 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
9228 (__v8si) __X,
9229 (__v8si)
9230 _mm256_setzero_si256 (),
9231 (__mmask8) -1);
9234 extern __inline __m256i
9235 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9236 _mm256_mask_permutexvar_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
9237 __m256i __Y)
9239 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
9240 (__v8si) __X,
9241 (__v8si) __W,
9242 __M);
9245 extern __inline __mmask8
9246 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9247 _mm256_mask_cmpneq_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9249 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9250 (__v8si) __Y, 4,
9251 (__mmask8) __M);
9254 extern __inline __mmask8
9255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9256 _mm256_cmpneq_epu32_mask (__m256i __X, __m256i __Y)
9258 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9259 (__v8si) __Y, 4,
9260 (__mmask8) -1);
9263 extern __inline __mmask8
9264 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9265 _mm256_mask_cmplt_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9267 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9268 (__v8si) __Y, 1,
9269 (__mmask8) __M);
9272 extern __inline __mmask8
9273 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9274 _mm256_cmplt_epu32_mask (__m256i __X, __m256i __Y)
9276 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9277 (__v8si) __Y, 1,
9278 (__mmask8) -1);
9281 extern __inline __mmask8
9282 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9283 _mm256_mask_cmpge_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9285 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9286 (__v8si) __Y, 5,
9287 (__mmask8) __M);
9290 extern __inline __mmask8
9291 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9292 _mm256_cmpge_epu32_mask (__m256i __X, __m256i __Y)
9294 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9295 (__v8si) __Y, 5,
9296 (__mmask8) -1);
9299 extern __inline __mmask8
9300 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9301 _mm256_mask_cmple_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9303 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9304 (__v8si) __Y, 2,
9305 (__mmask8) __M);
9308 extern __inline __mmask8
9309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9310 _mm256_cmple_epu32_mask (__m256i __X, __m256i __Y)
9312 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9313 (__v8si) __Y, 2,
9314 (__mmask8) -1);
9317 extern __inline __mmask8
9318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9319 _mm256_mask_cmpneq_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9321 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9322 (__v4di) __Y, 4,
9323 (__mmask8) __M);
9326 extern __inline __mmask8
9327 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9328 _mm256_cmpneq_epu64_mask (__m256i __X, __m256i __Y)
9330 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9331 (__v4di) __Y, 4,
9332 (__mmask8) -1);
9335 extern __inline __mmask8
9336 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9337 _mm256_mask_cmplt_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9339 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9340 (__v4di) __Y, 1,
9341 (__mmask8) __M);
9344 extern __inline __mmask8
9345 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9346 _mm256_cmplt_epu64_mask (__m256i __X, __m256i __Y)
9348 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9349 (__v4di) __Y, 1,
9350 (__mmask8) -1);
9353 extern __inline __mmask8
9354 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9355 _mm256_mask_cmpge_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9357 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9358 (__v4di) __Y, 5,
9359 (__mmask8) __M);
9362 extern __inline __mmask8
9363 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9364 _mm256_cmpge_epu64_mask (__m256i __X, __m256i __Y)
9366 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9367 (__v4di) __Y, 5,
9368 (__mmask8) -1);
9371 extern __inline __mmask8
9372 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9373 _mm256_mask_cmple_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9375 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9376 (__v4di) __Y, 2,
9377 (__mmask8) __M);
9380 extern __inline __mmask8
9381 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9382 _mm256_cmple_epu64_mask (__m256i __X, __m256i __Y)
9384 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9385 (__v4di) __Y, 2,
9386 (__mmask8) -1);
9389 extern __inline __mmask8
9390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9391 _mm256_mask_cmpneq_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9393 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9394 (__v8si) __Y, 4,
9395 (__mmask8) __M);
9398 extern __inline __mmask8
9399 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9400 _mm256_cmpneq_epi32_mask (__m256i __X, __m256i __Y)
9402 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9403 (__v8si) __Y, 4,
9404 (__mmask8) -1);
9407 extern __inline __mmask8
9408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9409 _mm256_mask_cmplt_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9411 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9412 (__v8si) __Y, 1,
9413 (__mmask8) __M);
9416 extern __inline __mmask8
9417 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9418 _mm256_cmplt_epi32_mask (__m256i __X, __m256i __Y)
9420 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9421 (__v8si) __Y, 1,
9422 (__mmask8) -1);
9425 extern __inline __mmask8
9426 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9427 _mm256_mask_cmpge_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9429 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9430 (__v8si) __Y, 5,
9431 (__mmask8) __M);
9434 extern __inline __mmask8
9435 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9436 _mm256_cmpge_epi32_mask (__m256i __X, __m256i __Y)
9438 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9439 (__v8si) __Y, 5,
9440 (__mmask8) -1);
9443 extern __inline __mmask8
9444 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9445 _mm256_mask_cmple_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9447 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9448 (__v8si) __Y, 2,
9449 (__mmask8) __M);
9452 extern __inline __mmask8
9453 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9454 _mm256_cmple_epi32_mask (__m256i __X, __m256i __Y)
9456 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9457 (__v8si) __Y, 2,
9458 (__mmask8) -1);
9461 extern __inline __mmask8
9462 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9463 _mm256_mask_cmpneq_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9465 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9466 (__v4di) __Y, 4,
9467 (__mmask8) __M);
9470 extern __inline __mmask8
9471 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9472 _mm256_cmpneq_epi64_mask (__m256i __X, __m256i __Y)
9474 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9475 (__v4di) __Y, 4,
9476 (__mmask8) -1);
9479 extern __inline __mmask8
9480 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9481 _mm256_mask_cmplt_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9483 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9484 (__v4di) __Y, 1,
9485 (__mmask8) __M);
9488 extern __inline __mmask8
9489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9490 _mm256_cmplt_epi64_mask (__m256i __X, __m256i __Y)
9492 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9493 (__v4di) __Y, 1,
9494 (__mmask8) -1);
9497 extern __inline __mmask8
9498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9499 _mm256_mask_cmpge_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9501 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9502 (__v4di) __Y, 5,
9503 (__mmask8) __M);
9506 extern __inline __mmask8
9507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9508 _mm256_cmpge_epi64_mask (__m256i __X, __m256i __Y)
9510 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9511 (__v4di) __Y, 5,
9512 (__mmask8) -1);
9515 extern __inline __mmask8
9516 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9517 _mm256_mask_cmple_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9519 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9520 (__v4di) __Y, 2,
9521 (__mmask8) __M);
9524 extern __inline __mmask8
9525 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9526 _mm256_cmple_epi64_mask (__m256i __X, __m256i __Y)
9528 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9529 (__v4di) __Y, 2,
9530 (__mmask8) -1);
9533 extern __inline __mmask8
9534 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9535 _mm_mask_cmpneq_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9537 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9538 (__v4si) __Y, 4,
9539 (__mmask8) __M);
9542 extern __inline __mmask8
9543 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9544 _mm_cmpneq_epu32_mask (__m128i __X, __m128i __Y)
9546 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9547 (__v4si) __Y, 4,
9548 (__mmask8) -1);
9551 extern __inline __mmask8
9552 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9553 _mm_mask_cmplt_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9555 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9556 (__v4si) __Y, 1,
9557 (__mmask8) __M);
9560 extern __inline __mmask8
9561 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9562 _mm_cmplt_epu32_mask (__m128i __X, __m128i __Y)
9564 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9565 (__v4si) __Y, 1,
9566 (__mmask8) -1);
9569 extern __inline __mmask8
9570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9571 _mm_mask_cmpge_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9573 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9574 (__v4si) __Y, 5,
9575 (__mmask8) __M);
9578 extern __inline __mmask8
9579 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9580 _mm_cmpge_epu32_mask (__m128i __X, __m128i __Y)
9582 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9583 (__v4si) __Y, 5,
9584 (__mmask8) -1);
9587 extern __inline __mmask8
9588 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9589 _mm_mask_cmple_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9591 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9592 (__v4si) __Y, 2,
9593 (__mmask8) __M);
9596 extern __inline __mmask8
9597 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9598 _mm_cmple_epu32_mask (__m128i __X, __m128i __Y)
9600 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9601 (__v4si) __Y, 2,
9602 (__mmask8) -1);
9605 extern __inline __mmask8
9606 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9607 _mm_mask_cmpneq_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9609 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9610 (__v2di) __Y, 4,
9611 (__mmask8) __M);
9614 extern __inline __mmask8
9615 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9616 _mm_cmpneq_epu64_mask (__m128i __X, __m128i __Y)
9618 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9619 (__v2di) __Y, 4,
9620 (__mmask8) -1);
9623 extern __inline __mmask8
9624 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9625 _mm_mask_cmplt_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9627 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9628 (__v2di) __Y, 1,
9629 (__mmask8) __M);
9632 extern __inline __mmask8
9633 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9634 _mm_cmplt_epu64_mask (__m128i __X, __m128i __Y)
9636 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9637 (__v2di) __Y, 1,
9638 (__mmask8) -1);
9641 extern __inline __mmask8
9642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9643 _mm_mask_cmpge_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9645 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9646 (__v2di) __Y, 5,
9647 (__mmask8) __M);
9650 extern __inline __mmask8
9651 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9652 _mm_cmpge_epu64_mask (__m128i __X, __m128i __Y)
9654 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9655 (__v2di) __Y, 5,
9656 (__mmask8) -1);
9659 extern __inline __mmask8
9660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9661 _mm_mask_cmple_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9663 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9664 (__v2di) __Y, 2,
9665 (__mmask8) __M);
9668 extern __inline __mmask8
9669 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9670 _mm_cmple_epu64_mask (__m128i __X, __m128i __Y)
9672 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9673 (__v2di) __Y, 2,
9674 (__mmask8) -1);
9677 extern __inline __mmask8
9678 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9679 _mm_mask_cmpneq_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9681 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9682 (__v4si) __Y, 4,
9683 (__mmask8) __M);
9686 extern __inline __mmask8
9687 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9688 _mm_cmpneq_epi32_mask (__m128i __X, __m128i __Y)
9690 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9691 (__v4si) __Y, 4,
9692 (__mmask8) -1);
9695 extern __inline __mmask8
9696 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9697 _mm_mask_cmplt_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9699 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9700 (__v4si) __Y, 1,
9701 (__mmask8) __M);
9704 extern __inline __mmask8
9705 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9706 _mm_cmplt_epi32_mask (__m128i __X, __m128i __Y)
9708 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9709 (__v4si) __Y, 1,
9710 (__mmask8) -1);
9713 extern __inline __mmask8
9714 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9715 _mm_mask_cmpge_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9717 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9718 (__v4si) __Y, 5,
9719 (__mmask8) __M);
9722 extern __inline __mmask8
9723 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9724 _mm_cmpge_epi32_mask (__m128i __X, __m128i __Y)
9726 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9727 (__v4si) __Y, 5,
9728 (__mmask8) -1);
9731 extern __inline __mmask8
9732 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9733 _mm_mask_cmple_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9735 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9736 (__v4si) __Y, 2,
9737 (__mmask8) __M);
9740 extern __inline __mmask8
9741 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9742 _mm_cmple_epi32_mask (__m128i __X, __m128i __Y)
9744 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9745 (__v4si) __Y, 2,
9746 (__mmask8) -1);
9749 extern __inline __mmask8
9750 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9751 _mm_mask_cmpneq_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9753 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9754 (__v2di) __Y, 4,
9755 (__mmask8) __M);
9758 extern __inline __mmask8
9759 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9760 _mm_cmpneq_epi64_mask (__m128i __X, __m128i __Y)
9762 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9763 (__v2di) __Y, 4,
9764 (__mmask8) -1);
9767 extern __inline __mmask8
9768 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9769 _mm_mask_cmplt_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9771 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9772 (__v2di) __Y, 1,
9773 (__mmask8) __M);
9776 extern __inline __mmask8
9777 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9778 _mm_cmplt_epi64_mask (__m128i __X, __m128i __Y)
9780 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9781 (__v2di) __Y, 1,
9782 (__mmask8) -1);
9785 extern __inline __mmask8
9786 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9787 _mm_mask_cmpge_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9789 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9790 (__v2di) __Y, 5,
9791 (__mmask8) __M);
9794 extern __inline __mmask8
9795 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9796 _mm_cmpge_epi64_mask (__m128i __X, __m128i __Y)
9798 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9799 (__v2di) __Y, 5,
9800 (__mmask8) -1);
9803 extern __inline __mmask8
9804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9805 _mm_mask_cmple_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9807 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9808 (__v2di) __Y, 2,
9809 (__mmask8) __M);
9812 extern __inline __mmask8
9813 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9814 _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
9816 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9817 (__v2di) __Y, 2,
9818 (__mmask8) -1);
9821 #ifdef __OPTIMIZE__
9822 extern __inline __m256i
9823 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9824 _mm256_permutex_epi64 (__m256i __X, const int __I)
9826 return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
9827 __I,
9828 (__v4di)
9829 _mm256_setzero_si256(),
9830 (__mmask8) -1);
9833 extern __inline __m256i
9834 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9835 _mm256_mask_permutex_epi64 (__m256i __W, __mmask8 __M,
9836 __m256i __X, const int __I)
9838 return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
9839 __I,
9840 (__v4di) __W,
9841 (__mmask8) __M);
9844 extern __inline __m256i
9845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9846 _mm256_maskz_permutex_epi64 (__mmask8 __M, __m256i __X, const int __I)
9848 return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
9849 __I,
9850 (__v4di)
9851 _mm256_setzero_si256 (),
9852 (__mmask8) __M);
9855 extern __inline __m256d
9856 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9857 _mm256_mask_shuffle_pd (__m256d __W, __mmask8 __U, __m256d __A,
9858 __m256d __B, const int __imm)
9860 return (__m256d) __builtin_ia32_shufpd256_mask ((__v4df) __A,
9861 (__v4df) __B, __imm,
9862 (__v4df) __W,
9863 (__mmask8) __U);
9866 extern __inline __m256d
9867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9868 _mm256_maskz_shuffle_pd (__mmask8 __U, __m256d __A, __m256d __B,
9869 const int __imm)
9871 return (__m256d) __builtin_ia32_shufpd256_mask ((__v4df) __A,
9872 (__v4df) __B, __imm,
9873 (__v4df)
9874 _mm256_setzero_pd (),
9875 (__mmask8) __U);
9878 extern __inline __m128d
9879 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9880 _mm_mask_shuffle_pd (__m128d __W, __mmask8 __U, __m128d __A,
9881 __m128d __B, const int __imm)
9883 return (__m128d) __builtin_ia32_shufpd128_mask ((__v2df) __A,
9884 (__v2df) __B, __imm,
9885 (__v2df) __W,
9886 (__mmask8) __U);
9889 extern __inline __m128d
9890 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9891 _mm_maskz_shuffle_pd (__mmask8 __U, __m128d __A, __m128d __B,
9892 const int __imm)
9894 return (__m128d) __builtin_ia32_shufpd128_mask ((__v2df) __A,
9895 (__v2df) __B, __imm,
9896 (__v2df)
9897 _mm_setzero_pd (),
9898 (__mmask8) __U);
9901 extern __inline __m256
9902 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9903 _mm256_mask_shuffle_ps (__m256 __W, __mmask8 __U, __m256 __A,
9904 __m256 __B, const int __imm)
9906 return (__m256) __builtin_ia32_shufps256_mask ((__v8sf) __A,
9907 (__v8sf) __B, __imm,
9908 (__v8sf) __W,
9909 (__mmask8) __U);
9912 extern __inline __m256
9913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9914 _mm256_maskz_shuffle_ps (__mmask8 __U, __m256 __A, __m256 __B,
9915 const int __imm)
9917 return (__m256) __builtin_ia32_shufps256_mask ((__v8sf) __A,
9918 (__v8sf) __B, __imm,
9919 (__v8sf)
9920 _mm256_setzero_ps (),
9921 (__mmask8) __U);
9924 extern __inline __m128
9925 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9926 _mm_mask_shuffle_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
9927 const int __imm)
9929 return (__m128) __builtin_ia32_shufps128_mask ((__v4sf) __A,
9930 (__v4sf) __B, __imm,
9931 (__v4sf) __W,
9932 (__mmask8) __U);
9935 extern __inline __m128
9936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9937 _mm_maskz_shuffle_ps (__mmask8 __U, __m128 __A, __m128 __B,
9938 const int __imm)
9940 return (__m128) __builtin_ia32_shufps128_mask ((__v4sf) __A,
9941 (__v4sf) __B, __imm,
9942 (__v4sf)
9943 _mm_setzero_ps (),
9944 (__mmask8) __U);
9947 extern __inline __m256i
9948 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9949 _mm256_inserti32x4 (__m256i __A, __m128i __B, const int __imm)
9951 return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
9952 (__v4si) __B,
9953 __imm,
9954 (__v8si)
9955 _mm256_setzero_si256 (),
9956 (__mmask8) -1);
9959 extern __inline __m256i
9960 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9961 _mm256_mask_inserti32x4 (__m256i __W, __mmask8 __U, __m256i __A,
9962 __m128i __B, const int __imm)
9964 return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
9965 (__v4si) __B,
9966 __imm,
9967 (__v8si) __W,
9968 (__mmask8)
9969 __U);
9972 extern __inline __m256i
9973 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9974 _mm256_maskz_inserti32x4 (__mmask8 __U, __m256i __A, __m128i __B,
9975 const int __imm)
9977 return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
9978 (__v4si) __B,
9979 __imm,
9980 (__v8si)
9981 _mm256_setzero_si256 (),
9982 (__mmask8)
9983 __U);
9986 extern __inline __m256
9987 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9988 _mm256_insertf32x4 (__m256 __A, __m128 __B, const int __imm)
9990 return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
9991 (__v4sf) __B,
9992 __imm,
9993 (__v8sf)
9994 _mm256_setzero_ps (),
9995 (__mmask8) -1);
9998 extern __inline __m256
9999 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10000 _mm256_mask_insertf32x4 (__m256 __W, __mmask8 __U, __m256 __A,
10001 __m128 __B, const int __imm)
10003 return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
10004 (__v4sf) __B,
10005 __imm,
10006 (__v8sf) __W,
10007 (__mmask8) __U);
10010 extern __inline __m256
10011 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10012 _mm256_maskz_insertf32x4 (__mmask8 __U, __m256 __A, __m128 __B,
10013 const int __imm)
10015 return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
10016 (__v4sf) __B,
10017 __imm,
10018 (__v8sf)
10019 _mm256_setzero_ps (),
10020 (__mmask8) __U);
10023 extern __inline __m128i
10024 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10025 _mm256_extracti32x4_epi32 (__m256i __A, const int __imm)
10027 return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
10028 __imm,
10029 (__v4si)
10030 _mm_setzero_si128 (),
10031 (__mmask8) -1);
10034 extern __inline __m128i
10035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10036 _mm256_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m256i __A,
10037 const int __imm)
10039 return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
10040 __imm,
10041 (__v4si) __W,
10042 (__mmask8)
10043 __U);
10046 extern __inline __m128i
10047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10048 _mm256_maskz_extracti32x4_epi32 (__mmask8 __U, __m256i __A,
10049 const int __imm)
10051 return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
10052 __imm,
10053 (__v4si)
10054 _mm_setzero_si128 (),
10055 (__mmask8)
10056 __U);
10059 extern __inline __m128
10060 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10061 _mm256_extractf32x4_ps (__m256 __A, const int __imm)
10063 return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
10064 __imm,
10065 (__v4sf)
10066 _mm_setzero_ps (),
10067 (__mmask8) -1);
10070 extern __inline __m128
10071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10072 _mm256_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m256 __A,
10073 const int __imm)
10075 return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
10076 __imm,
10077 (__v4sf) __W,
10078 (__mmask8)
10079 __U);
10082 extern __inline __m128
10083 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10084 _mm256_maskz_extractf32x4_ps (__mmask8 __U, __m256 __A,
10085 const int __imm)
10087 return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
10088 __imm,
10089 (__v4sf)
10090 _mm_setzero_ps (),
10091 (__mmask8)
10092 __U);
10095 extern __inline __m256i
10096 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10097 _mm256_shuffle_i64x2 (__m256i __A, __m256i __B, const int __imm)
10099 return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
10100 (__v4di) __B,
10101 __imm,
10102 (__v4di)
10103 _mm256_setzero_si256 (),
10104 (__mmask8) -1);
10107 extern __inline __m256i
10108 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10109 _mm256_mask_shuffle_i64x2 (__m256i __W, __mmask8 __U, __m256i __A,
10110 __m256i __B, const int __imm)
10112 return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
10113 (__v4di) __B,
10114 __imm,
10115 (__v4di) __W,
10116 (__mmask8) __U);
10119 extern __inline __m256i
10120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10121 _mm256_maskz_shuffle_i64x2 (__mmask8 __U, __m256i __A, __m256i __B,
10122 const int __imm)
10124 return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
10125 (__v4di) __B,
10126 __imm,
10127 (__v4di)
10128 _mm256_setzero_si256 (),
10129 (__mmask8) __U);
10132 extern __inline __m256i
10133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10134 _mm256_shuffle_i32x4 (__m256i __A, __m256i __B, const int __imm)
10136 return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
10137 (__v8si) __B,
10138 __imm,
10139 (__v8si)
10140 _mm256_setzero_si256 (),
10141 (__mmask8) -1);
10144 extern __inline __m256i
10145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10146 _mm256_mask_shuffle_i32x4 (__m256i __W, __mmask8 __U, __m256i __A,
10147 __m256i __B, const int __imm)
10149 return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
10150 (__v8si) __B,
10151 __imm,
10152 (__v8si) __W,
10153 (__mmask8) __U);
10156 extern __inline __m256i
10157 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10158 _mm256_maskz_shuffle_i32x4 (__mmask8 __U, __m256i __A, __m256i __B,
10159 const int __imm)
10161 return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
10162 (__v8si) __B,
10163 __imm,
10164 (__v8si)
10165 _mm256_setzero_si256 (),
10166 (__mmask8) __U);
10169 extern __inline __m256d
10170 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10171 _mm256_shuffle_f64x2 (__m256d __A, __m256d __B, const int __imm)
10173 return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
10174 (__v4df) __B,
10175 __imm,
10176 (__v4df)
10177 _mm256_setzero_pd (),
10178 (__mmask8) -1);
10181 extern __inline __m256d
10182 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10183 _mm256_mask_shuffle_f64x2 (__m256d __W, __mmask8 __U, __m256d __A,
10184 __m256d __B, const int __imm)
10186 return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
10187 (__v4df) __B,
10188 __imm,
10189 (__v4df) __W,
10190 (__mmask8) __U);
10193 extern __inline __m256d
10194 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10195 _mm256_maskz_shuffle_f64x2 (__mmask8 __U, __m256d __A, __m256d __B,
10196 const int __imm)
10198 return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
10199 (__v4df) __B,
10200 __imm,
10201 (__v4df)
10202 _mm256_setzero_pd (),
10203 (__mmask8) __U);
10206 extern __inline __m256
10207 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10208 _mm256_shuffle_f32x4 (__m256 __A, __m256 __B, const int __imm)
10210 return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
10211 (__v8sf) __B,
10212 __imm,
10213 (__v8sf)
10214 _mm256_setzero_ps (),
10215 (__mmask8) -1);
10218 extern __inline __m256
10219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10220 _mm256_mask_shuffle_f32x4 (__m256 __W, __mmask8 __U, __m256 __A,
10221 __m256 __B, const int __imm)
10223 return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
10224 (__v8sf) __B,
10225 __imm,
10226 (__v8sf) __W,
10227 (__mmask8) __U);
10230 extern __inline __m256
10231 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10232 _mm256_maskz_shuffle_f32x4 (__mmask8 __U, __m256 __A, __m256 __B,
10233 const int __imm)
10235 return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
10236 (__v8sf) __B,
10237 __imm,
10238 (__v8sf)
10239 _mm256_setzero_ps (),
10240 (__mmask8) __U);
10243 extern __inline __m256d
10244 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10245 _mm256_fixupimm_pd (__m256d __A, __m256d __B, __m256i __C,
10246 const int __imm)
10248 return (__m256d) __builtin_ia32_fixupimmpd256_mask ((__v4df) __A,
10249 (__v4df) __B,
10250 (__v4di) __C,
10251 __imm,
10252 (__mmask8) -1);
10255 extern __inline __m256d
10256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10257 _mm256_mask_fixupimm_pd (__m256d __A, __mmask8 __U, __m256d __B,
10258 __m256i __C, const int __imm)
10260 return (__m256d) __builtin_ia32_fixupimmpd256_mask ((__v4df) __A,
10261 (__v4df) __B,
10262 (__v4di) __C,
10263 __imm,
10264 (__mmask8) __U);
10267 extern __inline __m256d
10268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10269 _mm256_maskz_fixupimm_pd (__mmask8 __U, __m256d __A, __m256d __B,
10270 __m256i __C, const int __imm)
10272 return (__m256d) __builtin_ia32_fixupimmpd256_maskz ((__v4df) __A,
10273 (__v4df) __B,
10274 (__v4di) __C,
10275 __imm,
10276 (__mmask8) __U);
10279 extern __inline __m256
10280 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10281 _mm256_fixupimm_ps (__m256 __A, __m256 __B, __m256i __C,
10282 const int __imm)
10284 return (__m256) __builtin_ia32_fixupimmps256_mask ((__v8sf) __A,
10285 (__v8sf) __B,
10286 (__v8si) __C,
10287 __imm,
10288 (__mmask8) -1);
10291 extern __inline __m256
10292 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10293 _mm256_mask_fixupimm_ps (__m256 __A, __mmask8 __U, __m256 __B,
10294 __m256i __C, const int __imm)
10296 return (__m256) __builtin_ia32_fixupimmps256_mask ((__v8sf) __A,
10297 (__v8sf) __B,
10298 (__v8si) __C,
10299 __imm,
10300 (__mmask8) __U);
10303 extern __inline __m256
10304 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10305 _mm256_maskz_fixupimm_ps (__mmask8 __U, __m256 __A, __m256 __B,
10306 __m256i __C, const int __imm)
10308 return (__m256) __builtin_ia32_fixupimmps256_maskz ((__v8sf) __A,
10309 (__v8sf) __B,
10310 (__v8si) __C,
10311 __imm,
10312 (__mmask8) __U);
10315 extern __inline __m128d
10316 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10317 _mm_fixupimm_pd (__m128d __A, __m128d __B, __m128i __C,
10318 const int __imm)
10320 return (__m128d) __builtin_ia32_fixupimmpd128_mask ((__v2df) __A,
10321 (__v2df) __B,
10322 (__v2di) __C,
10323 __imm,
10324 (__mmask8) -1);
10327 extern __inline __m128d
10328 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10329 _mm_mask_fixupimm_pd (__m128d __A, __mmask8 __U, __m128d __B,
10330 __m128i __C, const int __imm)
10332 return (__m128d) __builtin_ia32_fixupimmpd128_mask ((__v2df) __A,
10333 (__v2df) __B,
10334 (__v2di) __C,
10335 __imm,
10336 (__mmask8) __U);
10339 extern __inline __m128d
10340 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10341 _mm_maskz_fixupimm_pd (__mmask8 __U, __m128d __A, __m128d __B,
10342 __m128i __C, const int __imm)
10344 return (__m128d) __builtin_ia32_fixupimmpd128_maskz ((__v2df) __A,
10345 (__v2df) __B,
10346 (__v2di) __C,
10347 __imm,
10348 (__mmask8) __U);
10351 extern __inline __m128
10352 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10353 _mm_fixupimm_ps (__m128 __A, __m128 __B, __m128i __C, const int __imm)
10355 return (__m128) __builtin_ia32_fixupimmps128_mask ((__v4sf) __A,
10356 (__v4sf) __B,
10357 (__v4si) __C,
10358 __imm,
10359 (__mmask8) -1);
10362 extern __inline __m128
10363 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10364 _mm_mask_fixupimm_ps (__m128 __A, __mmask8 __U, __m128 __B,
10365 __m128i __C, const int __imm)
10367 return (__m128) __builtin_ia32_fixupimmps128_mask ((__v4sf) __A,
10368 (__v4sf) __B,
10369 (__v4si) __C,
10370 __imm,
10371 (__mmask8) __U);
10374 extern __inline __m128
10375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10376 _mm_maskz_fixupimm_ps (__mmask8 __U, __m128 __A, __m128 __B,
10377 __m128i __C, const int __imm)
10379 return (__m128) __builtin_ia32_fixupimmps128_maskz ((__v4sf) __A,
10380 (__v4sf) __B,
10381 (__v4si) __C,
10382 __imm,
10383 (__mmask8) __U);
10386 extern __inline __m256i
10387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10388 _mm256_mask_srli_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
10389 const int __imm)
10391 return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm,
10392 (__v8si) __W,
10393 (__mmask8) __U);
10396 extern __inline __m256i
10397 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10398 _mm256_maskz_srli_epi32 (__mmask8 __U, __m256i __A, const int __imm)
10400 return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm,
10401 (__v8si)
10402 _mm256_setzero_si256 (),
10403 (__mmask8) __U);
10406 extern __inline __m128i
10407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10408 _mm_mask_srli_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
10409 const int __imm)
10411 return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm,
10412 (__v4si) __W,
10413 (__mmask8) __U);
10416 extern __inline __m128i
10417 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10418 _mm_maskz_srli_epi32 (__mmask8 __U, __m128i __A, const int __imm)
10420 return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm,
10421 (__v4si)
10422 _mm_setzero_si128 (),
10423 (__mmask8) __U);
10426 extern __inline __m256i
10427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10428 _mm256_mask_srli_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
10429 const int __imm)
10431 return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm,
10432 (__v4di) __W,
10433 (__mmask8) __U);
10436 extern __inline __m256i
10437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10438 _mm256_maskz_srli_epi64 (__mmask8 __U, __m256i __A, const int __imm)
10440 return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm,
10441 (__v4di)
10442 _mm256_setzero_si256 (),
10443 (__mmask8) __U);
10446 extern __inline __m128i
10447 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10448 _mm_mask_srli_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
10449 const int __imm)
10451 return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm,
10452 (__v2di) __W,
10453 (__mmask8) __U);
10456 extern __inline __m128i
10457 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10458 _mm_maskz_srli_epi64 (__mmask8 __U, __m128i __A, const int __imm)
10460 return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm,
10461 (__v2di)
10462 _mm_setzero_si128 (),
10463 (__mmask8) __U);
10466 extern __inline __m256i
10467 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10468 _mm256_ternarylogic_epi64 (__m256i __A, __m256i __B, __m256i __C,
10469 const int __imm)
10471 return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A,
10472 (__v4di) __B,
10473 (__v4di) __C, __imm,
10474 (__mmask8) -1);
10477 extern __inline __m256i
10478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10479 _mm256_mask_ternarylogic_epi64 (__m256i __A, __mmask8 __U,
10480 __m256i __B, __m256i __C,
10481 const int __imm)
10483 return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A,
10484 (__v4di) __B,
10485 (__v4di) __C, __imm,
10486 (__mmask8) __U);
10489 extern __inline __m256i
10490 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10491 _mm256_maskz_ternarylogic_epi64 (__mmask8 __U, __m256i __A,
10492 __m256i __B, __m256i __C,
10493 const int __imm)
10495 return (__m256i) __builtin_ia32_pternlogq256_maskz ((__v4di) __A,
10496 (__v4di) __B,
10497 (__v4di) __C,
10498 __imm,
10499 (__mmask8) __U);
10502 extern __inline __m256i
10503 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10504 _mm256_ternarylogic_epi32 (__m256i __A, __m256i __B, __m256i __C,
10505 const int __imm)
10507 return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A,
10508 (__v8si) __B,
10509 (__v8si) __C, __imm,
10510 (__mmask8) -1);
10513 extern __inline __m256i
10514 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10515 _mm256_mask_ternarylogic_epi32 (__m256i __A, __mmask8 __U,
10516 __m256i __B, __m256i __C,
10517 const int __imm)
10519 return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A,
10520 (__v8si) __B,
10521 (__v8si) __C, __imm,
10522 (__mmask8) __U);
10525 extern __inline __m256i
10526 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10527 _mm256_maskz_ternarylogic_epi32 (__mmask8 __U, __m256i __A,
10528 __m256i __B, __m256i __C,
10529 const int __imm)
10531 return (__m256i) __builtin_ia32_pternlogd256_maskz ((__v8si) __A,
10532 (__v8si) __B,
10533 (__v8si) __C,
10534 __imm,
10535 (__mmask8) __U);
10538 extern __inline __m128i
10539 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10540 _mm_ternarylogic_epi64 (__m128i __A, __m128i __B, __m128i __C,
10541 const int __imm)
10543 return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A,
10544 (__v2di) __B,
10545 (__v2di) __C, __imm,
10546 (__mmask8) -1);
10549 extern __inline __m128i
10550 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10551 _mm_mask_ternarylogic_epi64 (__m128i __A, __mmask8 __U,
10552 __m128i __B, __m128i __C, const int __imm)
10554 return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A,
10555 (__v2di) __B,
10556 (__v2di) __C, __imm,
10557 (__mmask8) __U);
10560 extern __inline __m128i
10561 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10562 _mm_maskz_ternarylogic_epi64 (__mmask8 __U, __m128i __A,
10563 __m128i __B, __m128i __C, const int __imm)
10565 return (__m128i) __builtin_ia32_pternlogq128_maskz ((__v2di) __A,
10566 (__v2di) __B,
10567 (__v2di) __C,
10568 __imm,
10569 (__mmask8) __U);
10572 extern __inline __m128i
10573 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10574 _mm_ternarylogic_epi32 (__m128i __A, __m128i __B, __m128i __C,
10575 const int __imm)
10577 return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A,
10578 (__v4si) __B,
10579 (__v4si) __C, __imm,
10580 (__mmask8) -1);
10583 extern __inline __m128i
10584 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10585 _mm_mask_ternarylogic_epi32 (__m128i __A, __mmask8 __U,
10586 __m128i __B, __m128i __C, const int __imm)
10588 return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A,
10589 (__v4si) __B,
10590 (__v4si) __C, __imm,
10591 (__mmask8) __U);
10594 extern __inline __m128i
10595 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10596 _mm_maskz_ternarylogic_epi32 (__mmask8 __U, __m128i __A,
10597 __m128i __B, __m128i __C, const int __imm)
10599 return (__m128i) __builtin_ia32_pternlogd128_maskz ((__v4si) __A,
10600 (__v4si) __B,
10601 (__v4si) __C,
10602 __imm,
10603 (__mmask8) __U);
10606 extern __inline __m256
10607 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10608 _mm256_roundscale_ps (__m256 __A, const int __imm)
10610 return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
10611 __imm,
10612 (__v8sf)
10613 _mm256_setzero_ps (),
10614 (__mmask8) -1);
10617 extern __inline __m256
10618 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10619 _mm256_mask_roundscale_ps (__m256 __W, __mmask8 __U, __m256 __A,
10620 const int __imm)
10622 return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
10623 __imm,
10624 (__v8sf) __W,
10625 (__mmask8) __U);
10628 extern __inline __m256
10629 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10630 _mm256_maskz_roundscale_ps (__mmask8 __U, __m256 __A, const int __imm)
10632 return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
10633 __imm,
10634 (__v8sf)
10635 _mm256_setzero_ps (),
10636 (__mmask8) __U);
10639 extern __inline __m256d
10640 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10641 _mm256_roundscale_pd (__m256d __A, const int __imm)
10643 return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
10644 __imm,
10645 (__v4df)
10646 _mm256_setzero_pd (),
10647 (__mmask8) -1);
10650 extern __inline __m256d
10651 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10652 _mm256_mask_roundscale_pd (__m256d __W, __mmask8 __U, __m256d __A,
10653 const int __imm)
10655 return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
10656 __imm,
10657 (__v4df) __W,
10658 (__mmask8) __U);
10661 extern __inline __m256d
10662 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10663 _mm256_maskz_roundscale_pd (__mmask8 __U, __m256d __A, const int __imm)
10665 return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
10666 __imm,
10667 (__v4df)
10668 _mm256_setzero_pd (),
10669 (__mmask8) __U);
10672 extern __inline __m128
10673 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10674 _mm_roundscale_ps (__m128 __A, const int __imm)
10676 return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
10677 __imm,
10678 (__v4sf)
10679 _mm_setzero_ps (),
10680 (__mmask8) -1);
10683 extern __inline __m128
10684 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10685 _mm_mask_roundscale_ps (__m128 __W, __mmask8 __U, __m128 __A,
10686 const int __imm)
10688 return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
10689 __imm,
10690 (__v4sf) __W,
10691 (__mmask8) __U);
10694 extern __inline __m128
10695 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10696 _mm_maskz_roundscale_ps (__mmask8 __U, __m128 __A, const int __imm)
10698 return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
10699 __imm,
10700 (__v4sf)
10701 _mm_setzero_ps (),
10702 (__mmask8) __U);
10705 extern __inline __m128d
10706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10707 _mm_roundscale_pd (__m128d __A, const int __imm)
10709 return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
10710 __imm,
10711 (__v2df)
10712 _mm_setzero_pd (),
10713 (__mmask8) -1);
10716 extern __inline __m128d
10717 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10718 _mm_mask_roundscale_pd (__m128d __W, __mmask8 __U, __m128d __A,
10719 const int __imm)
10721 return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
10722 __imm,
10723 (__v2df) __W,
10724 (__mmask8) __U);
10727 extern __inline __m128d
10728 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10729 _mm_maskz_roundscale_pd (__mmask8 __U, __m128d __A, const int __imm)
10731 return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
10732 __imm,
10733 (__v2df)
10734 _mm_setzero_pd (),
10735 (__mmask8) __U);
10738 extern __inline __m256
10739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10740 _mm256_getmant_ps (__m256 __A, _MM_MANTISSA_NORM_ENUM __B,
10741 _MM_MANTISSA_SIGN_ENUM __C)
10743 return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
10744 (__C << 2) | __B,
10745 (__v8sf)
10746 _mm256_setzero_ps (),
10747 (__mmask8) -1);
10750 extern __inline __m256
10751 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10752 _mm256_mask_getmant_ps (__m256 __W, __mmask8 __U, __m256 __A,
10753 _MM_MANTISSA_NORM_ENUM __B,
10754 _MM_MANTISSA_SIGN_ENUM __C)
10756 return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
10757 (__C << 2) | __B,
10758 (__v8sf) __W,
10759 (__mmask8) __U);
10762 extern __inline __m256
10763 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10764 _mm256_maskz_getmant_ps (__mmask8 __U, __m256 __A,
10765 _MM_MANTISSA_NORM_ENUM __B,
10766 _MM_MANTISSA_SIGN_ENUM __C)
10768 return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
10769 (__C << 2) | __B,
10770 (__v8sf)
10771 _mm256_setzero_ps (),
10772 (__mmask8) __U);
10775 extern __inline __m128
10776 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10777 _mm_getmant_ps (__m128 __A, _MM_MANTISSA_NORM_ENUM __B,
10778 _MM_MANTISSA_SIGN_ENUM __C)
10780 return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
10781 (__C << 2) | __B,
10782 (__v4sf)
10783 _mm_setzero_ps (),
10784 (__mmask8) -1);
10787 extern __inline __m128
10788 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10789 _mm_mask_getmant_ps (__m128 __W, __mmask8 __U, __m128 __A,
10790 _MM_MANTISSA_NORM_ENUM __B,
10791 _MM_MANTISSA_SIGN_ENUM __C)
10793 return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
10794 (__C << 2) | __B,
10795 (__v4sf) __W,
10796 (__mmask8) __U);
10799 extern __inline __m128
10800 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10801 _mm_maskz_getmant_ps (__mmask8 __U, __m128 __A,
10802 _MM_MANTISSA_NORM_ENUM __B,
10803 _MM_MANTISSA_SIGN_ENUM __C)
10805 return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
10806 (__C << 2) | __B,
10807 (__v4sf)
10808 _mm_setzero_ps (),
10809 (__mmask8) __U);
10812 extern __inline __m256d
10813 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10814 _mm256_getmant_pd (__m256d __A, _MM_MANTISSA_NORM_ENUM __B,
10815 _MM_MANTISSA_SIGN_ENUM __C)
10817 return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
10818 (__C << 2) | __B,
10819 (__v4df)
10820 _mm256_setzero_pd (),
10821 (__mmask8) -1);
10824 extern __inline __m256d
10825 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10826 _mm256_mask_getmant_pd (__m256d __W, __mmask8 __U, __m256d __A,
10827 _MM_MANTISSA_NORM_ENUM __B,
10828 _MM_MANTISSA_SIGN_ENUM __C)
10830 return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
10831 (__C << 2) | __B,
10832 (__v4df) __W,
10833 (__mmask8) __U);
10836 extern __inline __m256d
10837 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10838 _mm256_maskz_getmant_pd (__mmask8 __U, __m256d __A,
10839 _MM_MANTISSA_NORM_ENUM __B,
10840 _MM_MANTISSA_SIGN_ENUM __C)
10842 return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
10843 (__C << 2) | __B,
10844 (__v4df)
10845 _mm256_setzero_pd (),
10846 (__mmask8) __U);
10849 extern __inline __m128d
10850 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10851 _mm_getmant_pd (__m128d __A, _MM_MANTISSA_NORM_ENUM __B,
10852 _MM_MANTISSA_SIGN_ENUM __C)
10854 return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
10855 (__C << 2) | __B,
10856 (__v2df)
10857 _mm_setzero_pd (),
10858 (__mmask8) -1);
10861 extern __inline __m128d
10862 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10863 _mm_mask_getmant_pd (__m128d __W, __mmask8 __U, __m128d __A,
10864 _MM_MANTISSA_NORM_ENUM __B,
10865 _MM_MANTISSA_SIGN_ENUM __C)
10867 return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
10868 (__C << 2) | __B,
10869 (__v2df) __W,
10870 (__mmask8) __U);
10873 extern __inline __m128d
10874 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10875 _mm_maskz_getmant_pd (__mmask8 __U, __m128d __A,
10876 _MM_MANTISSA_NORM_ENUM __B,
10877 _MM_MANTISSA_SIGN_ENUM __C)
10879 return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
10880 (__C << 2) | __B,
10881 (__v2df)
10882 _mm_setzero_pd (),
10883 (__mmask8) __U);
10886 extern __inline __m256
10887 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10888 _mm256_mmask_i32gather_ps (__m256 __v1_old, __mmask8 __mask,
10889 __m256i __index, void const *__addr,
10890 int __scale)
10892 return (__m256) __builtin_ia32_gather3siv8sf ((__v8sf) __v1_old,
10893 __addr,
10894 (__v8si) __index,
10895 __mask, __scale);
10898 extern __inline __m128
10899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10900 _mm_mmask_i32gather_ps (__m128 __v1_old, __mmask8 __mask,
10901 __m128i __index, void const *__addr,
10902 int __scale)
10904 return (__m128) __builtin_ia32_gather3siv4sf ((__v4sf) __v1_old,
10905 __addr,
10906 (__v4si) __index,
10907 __mask, __scale);
10910 extern __inline __m256d
10911 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10912 _mm256_mmask_i32gather_pd (__m256d __v1_old, __mmask8 __mask,
10913 __m128i __index, void const *__addr,
10914 int __scale)
10916 return (__m256d) __builtin_ia32_gather3siv4df ((__v4df) __v1_old,
10917 __addr,
10918 (__v4si) __index,
10919 __mask, __scale);
10922 extern __inline __m128d
10923 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10924 _mm_mmask_i32gather_pd (__m128d __v1_old, __mmask8 __mask,
10925 __m128i __index, void const *__addr,
10926 int __scale)
10928 return (__m128d) __builtin_ia32_gather3siv2df ((__v2df) __v1_old,
10929 __addr,
10930 (__v4si) __index,
10931 __mask, __scale);
10934 extern __inline __m128
10935 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10936 _mm256_mmask_i64gather_ps (__m128 __v1_old, __mmask8 __mask,
10937 __m256i __index, void const *__addr,
10938 int __scale)
10940 return (__m128) __builtin_ia32_gather3div8sf ((__v4sf) __v1_old,
10941 __addr,
10942 (__v4di) __index,
10943 __mask, __scale);
10946 extern __inline __m128
10947 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10948 _mm_mmask_i64gather_ps (__m128 __v1_old, __mmask8 __mask,
10949 __m128i __index, void const *__addr,
10950 int __scale)
10952 return (__m128) __builtin_ia32_gather3div4sf ((__v4sf) __v1_old,
10953 __addr,
10954 (__v2di) __index,
10955 __mask, __scale);
10958 extern __inline __m256d
10959 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10960 _mm256_mmask_i64gather_pd (__m256d __v1_old, __mmask8 __mask,
10961 __m256i __index, void const *__addr,
10962 int __scale)
10964 return (__m256d) __builtin_ia32_gather3div4df ((__v4df) __v1_old,
10965 __addr,
10966 (__v4di) __index,
10967 __mask, __scale);
10970 extern __inline __m128d
10971 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10972 _mm_mmask_i64gather_pd (__m128d __v1_old, __mmask8 __mask,
10973 __m128i __index, void const *__addr,
10974 int __scale)
10976 return (__m128d) __builtin_ia32_gather3div2df ((__v2df) __v1_old,
10977 __addr,
10978 (__v2di) __index,
10979 __mask, __scale);
10982 extern __inline __m256i
10983 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10984 _mm256_mmask_i32gather_epi32 (__m256i __v1_old, __mmask8 __mask,
10985 __m256i __index, void const *__addr,
10986 int __scale)
10988 return (__m256i) __builtin_ia32_gather3siv8si ((__v8si) __v1_old,
10989 __addr,
10990 (__v8si) __index,
10991 __mask, __scale);
10994 extern __inline __m128i
10995 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10996 _mm_mmask_i32gather_epi32 (__m128i __v1_old, __mmask8 __mask,
10997 __m128i __index, void const *__addr,
10998 int __scale)
11000 return (__m128i) __builtin_ia32_gather3siv4si ((__v4si) __v1_old,
11001 __addr,
11002 (__v4si) __index,
11003 __mask, __scale);
11006 extern __inline __m256i
11007 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11008 _mm256_mmask_i32gather_epi64 (__m256i __v1_old, __mmask8 __mask,
11009 __m128i __index, void const *__addr,
11010 int __scale)
11012 return (__m256i) __builtin_ia32_gather3siv4di ((__v4di) __v1_old,
11013 __addr,
11014 (__v4si) __index,
11015 __mask, __scale);
11018 extern __inline __m128i
11019 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11020 _mm_mmask_i32gather_epi64 (__m128i __v1_old, __mmask8 __mask,
11021 __m128i __index, void const *__addr,
11022 int __scale)
11024 return (__m128i) __builtin_ia32_gather3siv2di ((__v2di) __v1_old,
11025 __addr,
11026 (__v4si) __index,
11027 __mask, __scale);
11030 extern __inline __m128i
11031 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11032 _mm256_mmask_i64gather_epi32 (__m128i __v1_old, __mmask8 __mask,
11033 __m256i __index, void const *__addr,
11034 int __scale)
11036 return (__m128i) __builtin_ia32_gather3div8si ((__v4si) __v1_old,
11037 __addr,
11038 (__v4di) __index,
11039 __mask, __scale);
11042 extern __inline __m128i
11043 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11044 _mm_mmask_i64gather_epi32 (__m128i __v1_old, __mmask8 __mask,
11045 __m128i __index, void const *__addr,
11046 int __scale)
11048 return (__m128i) __builtin_ia32_gather3div4si ((__v4si) __v1_old,
11049 __addr,
11050 (__v2di) __index,
11051 __mask, __scale);
11054 extern __inline __m256i
11055 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11056 _mm256_mmask_i64gather_epi64 (__m256i __v1_old, __mmask8 __mask,
11057 __m256i __index, void const *__addr,
11058 int __scale)
11060 return (__m256i) __builtin_ia32_gather3div4di ((__v4di) __v1_old,
11061 __addr,
11062 (__v4di) __index,
11063 __mask, __scale);
11066 extern __inline __m128i
11067 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11068 _mm_mmask_i64gather_epi64 (__m128i __v1_old, __mmask8 __mask,
11069 __m128i __index, void const *__addr,
11070 int __scale)
11072 return (__m128i) __builtin_ia32_gather3div2di ((__v2di) __v1_old,
11073 __addr,
11074 (__v2di) __index,
11075 __mask, __scale);
11078 extern __inline void
11079 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11080 _mm256_i32scatter_ps (void *__addr, __m256i __index,
11081 __m256 __v1, const int __scale)
11083 __builtin_ia32_scattersiv8sf (__addr, (__mmask8) 0xFF,
11084 (__v8si) __index, (__v8sf) __v1,
11085 __scale);
11088 extern __inline void
11089 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11090 _mm256_mask_i32scatter_ps (void *__addr, __mmask8 __mask,
11091 __m256i __index, __m256 __v1,
11092 const int __scale)
11094 __builtin_ia32_scattersiv8sf (__addr, __mask, (__v8si) __index,
11095 (__v8sf) __v1, __scale);
11098 extern __inline void
11099 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11100 _mm_i32scatter_ps (void *__addr, __m128i __index, __m128 __v1,
11101 const int __scale)
11103 __builtin_ia32_scattersiv4sf (__addr, (__mmask8) 0xFF,
11104 (__v4si) __index, (__v4sf) __v1,
11105 __scale);
11108 extern __inline void
11109 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11110 _mm_mask_i32scatter_ps (void *__addr, __mmask8 __mask,
11111 __m128i __index, __m128 __v1,
11112 const int __scale)
11114 __builtin_ia32_scattersiv4sf (__addr, __mask, (__v4si) __index,
11115 (__v4sf) __v1, __scale);
11118 extern __inline void
11119 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11120 _mm256_i32scatter_pd (void *__addr, __m128i __index,
11121 __m256d __v1, const int __scale)
11123 __builtin_ia32_scattersiv4df (__addr, (__mmask8) 0xFF,
11124 (__v4si) __index, (__v4df) __v1,
11125 __scale);
11128 extern __inline void
11129 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11130 _mm256_mask_i32scatter_pd (void *__addr, __mmask8 __mask,
11131 __m128i __index, __m256d __v1,
11132 const int __scale)
11134 __builtin_ia32_scattersiv4df (__addr, __mask, (__v4si) __index,
11135 (__v4df) __v1, __scale);
11138 extern __inline void
11139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11140 _mm_i32scatter_pd (void *__addr, __m128i __index,
11141 __m128d __v1, const int __scale)
11143 __builtin_ia32_scattersiv2df (__addr, (__mmask8) 0xFF,
11144 (__v4si) __index, (__v2df) __v1,
11145 __scale);
11148 extern __inline void
11149 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11150 _mm_mask_i32scatter_pd (void *__addr, __mmask8 __mask,
11151 __m128i __index, __m128d __v1,
11152 const int __scale)
11154 __builtin_ia32_scattersiv2df (__addr, __mask, (__v4si) __index,
11155 (__v2df) __v1, __scale);
11158 extern __inline void
11159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11160 _mm256_i64scatter_ps (void *__addr, __m256i __index,
11161 __m128 __v1, const int __scale)
11163 __builtin_ia32_scatterdiv8sf (__addr, (__mmask8) 0xFF,
11164 (__v4di) __index, (__v4sf) __v1,
11165 __scale);
11168 extern __inline void
11169 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11170 _mm256_mask_i64scatter_ps (void *__addr, __mmask8 __mask,
11171 __m256i __index, __m128 __v1,
11172 const int __scale)
11174 __builtin_ia32_scatterdiv8sf (__addr, __mask, (__v4di) __index,
11175 (__v4sf) __v1, __scale);
11178 extern __inline void
11179 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11180 _mm_i64scatter_ps (void *__addr, __m128i __index, __m128 __v1,
11181 const int __scale)
11183 __builtin_ia32_scatterdiv4sf (__addr, (__mmask8) 0xFF,
11184 (__v2di) __index, (__v4sf) __v1,
11185 __scale);
11188 extern __inline void
11189 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11190 _mm_mask_i64scatter_ps (void *__addr, __mmask8 __mask,
11191 __m128i __index, __m128 __v1,
11192 const int __scale)
11194 __builtin_ia32_scatterdiv4sf (__addr, __mask, (__v2di) __index,
11195 (__v4sf) __v1, __scale);
11198 extern __inline void
11199 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11200 _mm256_i64scatter_pd (void *__addr, __m256i __index,
11201 __m256d __v1, const int __scale)
11203 __builtin_ia32_scatterdiv4df (__addr, (__mmask8) 0xFF,
11204 (__v4di) __index, (__v4df) __v1,
11205 __scale);
11208 extern __inline void
11209 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11210 _mm256_mask_i64scatter_pd (void *__addr, __mmask8 __mask,
11211 __m256i __index, __m256d __v1,
11212 const int __scale)
11214 __builtin_ia32_scatterdiv4df (__addr, __mask, (__v4di) __index,
11215 (__v4df) __v1, __scale);
11218 extern __inline void
11219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11220 _mm_i64scatter_pd (void *__addr, __m128i __index,
11221 __m128d __v1, const int __scale)
11223 __builtin_ia32_scatterdiv2df (__addr, (__mmask8) 0xFF,
11224 (__v2di) __index, (__v2df) __v1,
11225 __scale);
11228 extern __inline void
11229 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11230 _mm_mask_i64scatter_pd (void *__addr, __mmask8 __mask,
11231 __m128i __index, __m128d __v1,
11232 const int __scale)
11234 __builtin_ia32_scatterdiv2df (__addr, __mask, (__v2di) __index,
11235 (__v2df) __v1, __scale);
11238 extern __inline void
11239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11240 _mm256_i32scatter_epi32 (void *__addr, __m256i __index,
11241 __m256i __v1, const int __scale)
11243 __builtin_ia32_scattersiv8si (__addr, (__mmask8) 0xFF,
11244 (__v8si) __index, (__v8si) __v1,
11245 __scale);
11248 extern __inline void
11249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11250 _mm256_mask_i32scatter_epi32 (void *__addr, __mmask8 __mask,
11251 __m256i __index, __m256i __v1,
11252 const int __scale)
11254 __builtin_ia32_scattersiv8si (__addr, __mask, (__v8si) __index,
11255 (__v8si) __v1, __scale);
11258 extern __inline void
11259 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11260 _mm_i32scatter_epi32 (void *__addr, __m128i __index,
11261 __m128i __v1, const int __scale)
11263 __builtin_ia32_scattersiv4si (__addr, (__mmask8) 0xFF,
11264 (__v4si) __index, (__v4si) __v1,
11265 __scale);
11268 extern __inline void
11269 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11270 _mm_mask_i32scatter_epi32 (void *__addr, __mmask8 __mask,
11271 __m128i __index, __m128i __v1,
11272 const int __scale)
11274 __builtin_ia32_scattersiv4si (__addr, __mask, (__v4si) __index,
11275 (__v4si) __v1, __scale);
11278 extern __inline void
11279 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11280 _mm256_i32scatter_epi64 (void *__addr, __m128i __index,
11281 __m256i __v1, const int __scale)
11283 __builtin_ia32_scattersiv4di (__addr, (__mmask8) 0xFF,
11284 (__v4si) __index, (__v4di) __v1,
11285 __scale);
11288 extern __inline void
11289 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11290 _mm256_mask_i32scatter_epi64 (void *__addr, __mmask8 __mask,
11291 __m128i __index, __m256i __v1,
11292 const int __scale)
11294 __builtin_ia32_scattersiv4di (__addr, __mask, (__v4si) __index,
11295 (__v4di) __v1, __scale);
11298 extern __inline void
11299 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11300 _mm_i32scatter_epi64 (void *__addr, __m128i __index,
11301 __m128i __v1, const int __scale)
11303 __builtin_ia32_scattersiv2di (__addr, (__mmask8) 0xFF,
11304 (__v4si) __index, (__v2di) __v1,
11305 __scale);
11308 extern __inline void
11309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11310 _mm_mask_i32scatter_epi64 (void *__addr, __mmask8 __mask,
11311 __m128i __index, __m128i __v1,
11312 const int __scale)
11314 __builtin_ia32_scattersiv2di (__addr, __mask, (__v4si) __index,
11315 (__v2di) __v1, __scale);
11318 extern __inline void
11319 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11320 _mm256_i64scatter_epi32 (void *__addr, __m256i __index,
11321 __m128i __v1, const int __scale)
11323 __builtin_ia32_scatterdiv8si (__addr, (__mmask8) 0xFF,
11324 (__v4di) __index, (__v4si) __v1,
11325 __scale);
11328 extern __inline void
11329 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11330 _mm256_mask_i64scatter_epi32 (void *__addr, __mmask8 __mask,
11331 __m256i __index, __m128i __v1,
11332 const int __scale)
11334 __builtin_ia32_scatterdiv8si (__addr, __mask, (__v4di) __index,
11335 (__v4si) __v1, __scale);
11338 extern __inline void
11339 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11340 _mm_i64scatter_epi32 (void *__addr, __m128i __index,
11341 __m128i __v1, const int __scale)
11343 __builtin_ia32_scatterdiv4si (__addr, (__mmask8) 0xFF,
11344 (__v2di) __index, (__v4si) __v1,
11345 __scale);
11348 extern __inline void
11349 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11350 _mm_mask_i64scatter_epi32 (void *__addr, __mmask8 __mask,
11351 __m128i __index, __m128i __v1,
11352 const int __scale)
11354 __builtin_ia32_scatterdiv4si (__addr, __mask, (__v2di) __index,
11355 (__v4si) __v1, __scale);
11358 extern __inline void
11359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11360 _mm256_i64scatter_epi64 (void *__addr, __m256i __index,
11361 __m256i __v1, const int __scale)
11363 __builtin_ia32_scatterdiv4di (__addr, (__mmask8) 0xFF,
11364 (__v4di) __index, (__v4di) __v1,
11365 __scale);
11368 extern __inline void
11369 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11370 _mm256_mask_i64scatter_epi64 (void *__addr, __mmask8 __mask,
11371 __m256i __index, __m256i __v1,
11372 const int __scale)
11374 __builtin_ia32_scatterdiv4di (__addr, __mask, (__v4di) __index,
11375 (__v4di) __v1, __scale);
11378 extern __inline void
11379 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11380 _mm_i64scatter_epi64 (void *__addr, __m128i __index,
11381 __m128i __v1, const int __scale)
11383 __builtin_ia32_scatterdiv2di (__addr, (__mmask8) 0xFF,
11384 (__v2di) __index, (__v2di) __v1,
11385 __scale);
11388 extern __inline void
11389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11390 _mm_mask_i64scatter_epi64 (void *__addr, __mmask8 __mask,
11391 __m128i __index, __m128i __v1,
11392 const int __scale)
11394 __builtin_ia32_scatterdiv2di (__addr, __mask, (__v2di) __index,
11395 (__v2di) __v1, __scale);
11398 extern __inline __m256i
11399 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11400 _mm256_mask_shuffle_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11401 _MM_PERM_ENUM __mask)
11403 return (__m256i) __builtin_ia32_pshufd256_mask ((__v8si) __A, __mask,
11404 (__v8si) __W,
11405 (__mmask8) __U);
11408 extern __inline __m256i
11409 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11410 _mm256_maskz_shuffle_epi32 (__mmask8 __U, __m256i __A,
11411 _MM_PERM_ENUM __mask)
11413 return (__m256i) __builtin_ia32_pshufd256_mask ((__v8si) __A, __mask,
11414 (__v8si)
11415 _mm256_setzero_si256 (),
11416 (__mmask8) __U);
11419 extern __inline __m128i
11420 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11421 _mm_mask_shuffle_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11422 _MM_PERM_ENUM __mask)
11424 return (__m128i) __builtin_ia32_pshufd128_mask ((__v4si) __A, __mask,
11425 (__v4si) __W,
11426 (__mmask8) __U);
11429 extern __inline __m128i
11430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11431 _mm_maskz_shuffle_epi32 (__mmask8 __U, __m128i __A,
11432 _MM_PERM_ENUM __mask)
11434 return (__m128i) __builtin_ia32_pshufd128_mask ((__v4si) __A, __mask,
11435 (__v4si)
11436 _mm_setzero_si128 (),
11437 (__mmask8) __U);
11440 extern __inline __m256i
11441 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11442 _mm256_rol_epi32 (__m256i __A, const int __B)
11444 return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
11445 (__v8si)
11446 _mm256_setzero_si256 (),
11447 (__mmask8) -1);
11450 extern __inline __m256i
11451 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11452 _mm256_mask_rol_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11453 const int __B)
11455 return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
11456 (__v8si) __W,
11457 (__mmask8) __U);
11460 extern __inline __m256i
11461 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11462 _mm256_maskz_rol_epi32 (__mmask8 __U, __m256i __A, const int __B)
11464 return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
11465 (__v8si)
11466 _mm256_setzero_si256 (),
11467 (__mmask8) __U);
11470 extern __inline __m128i
11471 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11472 _mm_rol_epi32 (__m128i __A, const int __B)
11474 return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
11475 (__v4si)
11476 _mm_setzero_si128 (),
11477 (__mmask8) -1);
11480 extern __inline __m128i
11481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11482 _mm_mask_rol_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11483 const int __B)
11485 return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
11486 (__v4si) __W,
11487 (__mmask8) __U);
11490 extern __inline __m128i
11491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11492 _mm_maskz_rol_epi32 (__mmask8 __U, __m128i __A, const int __B)
11494 return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
11495 (__v4si)
11496 _mm_setzero_si128 (),
11497 (__mmask8) __U);
11500 extern __inline __m256i
11501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11502 _mm256_ror_epi32 (__m256i __A, const int __B)
11504 return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
11505 (__v8si)
11506 _mm256_setzero_si256 (),
11507 (__mmask8) -1);
11510 extern __inline __m256i
11511 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11512 _mm256_mask_ror_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11513 const int __B)
11515 return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
11516 (__v8si) __W,
11517 (__mmask8) __U);
11520 extern __inline __m256i
11521 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11522 _mm256_maskz_ror_epi32 (__mmask8 __U, __m256i __A, const int __B)
11524 return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
11525 (__v8si)
11526 _mm256_setzero_si256 (),
11527 (__mmask8) __U);
11530 extern __inline __m128i
11531 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11532 _mm_ror_epi32 (__m128i __A, const int __B)
11534 return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
11535 (__v4si)
11536 _mm_setzero_si128 (),
11537 (__mmask8) -1);
11540 extern __inline __m128i
11541 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11542 _mm_mask_ror_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11543 const int __B)
11545 return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
11546 (__v4si) __W,
11547 (__mmask8) __U);
11550 extern __inline __m128i
11551 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11552 _mm_maskz_ror_epi32 (__mmask8 __U, __m128i __A, const int __B)
11554 return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
11555 (__v4si)
11556 _mm_setzero_si128 (),
11557 (__mmask8) __U);
11560 extern __inline __m256i
11561 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11562 _mm256_rol_epi64 (__m256i __A, const int __B)
11564 return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
11565 (__v4di)
11566 _mm256_setzero_si256 (),
11567 (__mmask8) -1);
11570 extern __inline __m256i
11571 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11572 _mm256_mask_rol_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11573 const int __B)
11575 return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
11576 (__v4di) __W,
11577 (__mmask8) __U);
11580 extern __inline __m256i
11581 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11582 _mm256_maskz_rol_epi64 (__mmask8 __U, __m256i __A, const int __B)
11584 return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
11585 (__v4di)
11586 _mm256_setzero_si256 (),
11587 (__mmask8) __U);
11590 extern __inline __m128i
11591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11592 _mm_rol_epi64 (__m128i __A, const int __B)
11594 return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
11595 (__v2di)
11596 _mm_setzero_si128 (),
11597 (__mmask8) -1);
11600 extern __inline __m128i
11601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11602 _mm_mask_rol_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
11603 const int __B)
11605 return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
11606 (__v2di) __W,
11607 (__mmask8) __U);
11610 extern __inline __m128i
11611 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11612 _mm_maskz_rol_epi64 (__mmask8 __U, __m128i __A, const int __B)
11614 return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
11615 (__v2di)
11616 _mm_setzero_si128 (),
11617 (__mmask8) __U);
11620 extern __inline __m256i
11621 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11622 _mm256_ror_epi64 (__m256i __A, const int __B)
11624 return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
11625 (__v4di)
11626 _mm256_setzero_si256 (),
11627 (__mmask8) -1);
11630 extern __inline __m256i
11631 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11632 _mm256_mask_ror_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11633 const int __B)
11635 return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
11636 (__v4di) __W,
11637 (__mmask8) __U);
11640 extern __inline __m256i
11641 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11642 _mm256_maskz_ror_epi64 (__mmask8 __U, __m256i __A, const int __B)
11644 return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
11645 (__v4di)
11646 _mm256_setzero_si256 (),
11647 (__mmask8) __U);
11650 extern __inline __m128i
11651 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11652 _mm_ror_epi64 (__m128i __A, const int __B)
11654 return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
11655 (__v2di)
11656 _mm_setzero_si128 (),
11657 (__mmask8) -1);
11660 extern __inline __m128i
11661 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11662 _mm_mask_ror_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
11663 const int __B)
11665 return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
11666 (__v2di) __W,
11667 (__mmask8) __U);
11670 extern __inline __m128i
11671 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11672 _mm_maskz_ror_epi64 (__mmask8 __U, __m128i __A, const int __B)
11674 return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
11675 (__v2di)
11676 _mm_setzero_si128 (),
11677 (__mmask8) __U);
11680 extern __inline __m128i
11681 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11682 _mm_alignr_epi32 (__m128i __A, __m128i __B, const int __imm)
11684 return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
11685 (__v4si) __B, __imm,
11686 (__v4si)
11687 _mm_setzero_si128 (),
11688 (__mmask8) -1);
11691 extern __inline __m128i
11692 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11693 _mm_mask_alignr_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11694 __m128i __B, const int __imm)
11696 return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
11697 (__v4si) __B, __imm,
11698 (__v4si) __W,
11699 (__mmask8) __U);
11702 extern __inline __m128i
11703 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11704 _mm_maskz_alignr_epi32 (__mmask8 __U, __m128i __A, __m128i __B,
11705 const int __imm)
11707 return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
11708 (__v4si) __B, __imm,
11709 (__v4si)
11710 _mm_setzero_si128 (),
11711 (__mmask8) __U);
11714 extern __inline __m128i
11715 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11716 _mm_alignr_epi64 (__m128i __A, __m128i __B, const int __imm)
11718 return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
11719 (__v2di) __B, __imm,
11720 (__v2di)
11721 _mm_setzero_si128 (),
11722 (__mmask8) -1);
11725 extern __inline __m128i
11726 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11727 _mm_mask_alignr_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
11728 __m128i __B, const int __imm)
11730 return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
11731 (__v2di) __B, __imm,
11732 (__v2di) __W,
11733 (__mmask8) __U);
11736 extern __inline __m128i
11737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11738 _mm_maskz_alignr_epi64 (__mmask8 __U, __m128i __A, __m128i __B,
11739 const int __imm)
11741 return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
11742 (__v2di) __B, __imm,
11743 (__v2di)
11744 _mm_setzero_si128 (),
11745 (__mmask8) __U);
11748 extern __inline __m256i
11749 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11750 _mm256_alignr_epi32 (__m256i __A, __m256i __B, const int __imm)
11752 return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
11753 (__v8si) __B, __imm,
11754 (__v8si)
11755 _mm256_setzero_si256 (),
11756 (__mmask8) -1);
11759 extern __inline __m256i
11760 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11761 _mm256_mask_alignr_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11762 __m256i __B, const int __imm)
11764 return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
11765 (__v8si) __B, __imm,
11766 (__v8si) __W,
11767 (__mmask8) __U);
11770 extern __inline __m256i
11771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11772 _mm256_maskz_alignr_epi32 (__mmask8 __U, __m256i __A, __m256i __B,
11773 const int __imm)
11775 return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
11776 (__v8si) __B, __imm,
11777 (__v8si)
11778 _mm256_setzero_si256 (),
11779 (__mmask8) __U);
11782 extern __inline __m256i
11783 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11784 _mm256_alignr_epi64 (__m256i __A, __m256i __B, const int __imm)
11786 return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
11787 (__v4di) __B, __imm,
11788 (__v4di)
11789 _mm256_setzero_si256 (),
11790 (__mmask8) -1);
11793 extern __inline __m256i
11794 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11795 _mm256_mask_alignr_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11796 __m256i __B, const int __imm)
11798 return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
11799 (__v4di) __B, __imm,
11800 (__v4di) __W,
11801 (__mmask8) __U);
11804 extern __inline __m256i
11805 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11806 _mm256_maskz_alignr_epi64 (__mmask8 __U, __m256i __A, __m256i __B,
11807 const int __imm)
11809 return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
11810 (__v4di) __B, __imm,
11811 (__v4di)
11812 _mm256_setzero_si256 (),
11813 (__mmask8) __U);
11816 extern __inline __m128i
11817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11818 _mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A,
11819 const int __I)
11821 return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I,
11822 (__v8hi) __W,
11823 (__mmask8) __U);
11826 extern __inline __m128i
11827 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11828 _mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A, const int __I)
11830 return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I,
11831 (__v8hi)
11832 _mm_setzero_si128 (),
11833 (__mmask8) __U);
11836 extern __inline __m128i
11837 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11838 _mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A,
11839 const int __I)
11841 return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I,
11842 (__v8hi) __W,
11843 (__mmask8) __U);
11846 extern __inline __m128i
11847 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11848 _mm256_maskz_cvtps_ph (__mmask8 __U, __m256 __A, const int __I)
11850 return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I,
11851 (__v8hi)
11852 _mm_setzero_si128 (),
11853 (__mmask8) __U);
11856 extern __inline __m256i
11857 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11858 _mm256_mask_srai_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11859 const int __imm)
11861 return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm,
11862 (__v8si) __W,
11863 (__mmask8) __U);
11866 extern __inline __m256i
11867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11868 _mm256_maskz_srai_epi32 (__mmask8 __U, __m256i __A, const int __imm)
11870 return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm,
11871 (__v8si)
11872 _mm256_setzero_si256 (),
11873 (__mmask8) __U);
11876 extern __inline __m128i
11877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11878 _mm_mask_srai_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11879 const int __imm)
11881 return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm,
11882 (__v4si) __W,
11883 (__mmask8) __U);
11886 extern __inline __m128i
11887 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11888 _mm_maskz_srai_epi32 (__mmask8 __U, __m128i __A, const int __imm)
11890 return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm,
11891 (__v4si)
11892 _mm_setzero_si128 (),
11893 (__mmask8) __U);
11896 extern __inline __m256i
11897 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11898 _mm256_srai_epi64 (__m256i __A, const int __imm)
11900 return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
11901 (__v4di)
11902 _mm256_setzero_si256 (),
11903 (__mmask8) -1);
11906 extern __inline __m256i
11907 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11908 _mm256_mask_srai_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11909 const int __imm)
11911 return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
11912 (__v4di) __W,
11913 (__mmask8) __U);
11916 extern __inline __m256i
11917 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11918 _mm256_maskz_srai_epi64 (__mmask8 __U, __m256i __A, const int __imm)
11920 return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
11921 (__v4di)
11922 _mm256_setzero_si256 (),
11923 (__mmask8) __U);
11926 extern __inline __m128i
11927 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11928 _mm_srai_epi64 (__m128i __A, const int __imm)
11930 return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
11931 (__v2di)
11932 _mm_setzero_si128 (),
11933 (__mmask8) -1);
11936 extern __inline __m128i
11937 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11938 _mm_mask_srai_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
11939 const int __imm)
11941 return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
11942 (__v2di) __W,
11943 (__mmask8) __U);
11946 extern __inline __m128i
11947 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11948 _mm_maskz_srai_epi64 (__mmask8 __U, __m128i __A, const int __imm)
11950 return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
11951 (__v2di)
11952 _mm_setzero_si128 (),
11953 (__mmask8) __U);
11956 extern __inline __m128i
11957 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11958 _mm_mask_slli_epi32 (__m128i __W, __mmask8 __U, __m128i __A, int __B)
11960 return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B,
11961 (__v4si) __W,
11962 (__mmask8) __U);
11965 extern __inline __m128i
11966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11967 _mm_maskz_slli_epi32 (__mmask8 __U, __m128i __A, int __B)
11969 return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B,
11970 (__v4si)
11971 _mm_setzero_si128 (),
11972 (__mmask8) __U);
11975 extern __inline __m128i
11976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11977 _mm_mask_slli_epi64 (__m128i __W, __mmask8 __U, __m128i __A, int __B)
11979 return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B,
11980 (__v2di) __W,
11981 (__mmask8) __U);
11984 extern __inline __m128i
11985 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11986 _mm_maskz_slli_epi64 (__mmask8 __U, __m128i __A, int __B)
11988 return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B,
11989 (__v2di)
11990 _mm_setzero_si128 (),
11991 (__mmask8) __U);
11994 extern __inline __m256i
11995 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11996 _mm256_mask_slli_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11997 int __B)
11999 return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B,
12000 (__v8si) __W,
12001 (__mmask8) __U);
12004 extern __inline __m256i
12005 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12006 _mm256_maskz_slli_epi32 (__mmask8 __U, __m256i __A, int __B)
12008 return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B,
12009 (__v8si)
12010 _mm256_setzero_si256 (),
12011 (__mmask8) __U);
12014 extern __inline __m256i
12015 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12016 _mm256_mask_slli_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
12017 int __B)
12019 return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B,
12020 (__v4di) __W,
12021 (__mmask8) __U);
12024 extern __inline __m256i
12025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12026 _mm256_maskz_slli_epi64 (__mmask8 __U, __m256i __A, int __B)
12028 return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B,
12029 (__v4di)
12030 _mm256_setzero_si256 (),
12031 (__mmask8) __U);
12034 extern __inline __m256d
12035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12036 _mm256_mask_permutex_pd (__m256d __W, __mmask8 __U, __m256d __X,
12037 const int __imm)
12039 return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __imm,
12040 (__v4df) __W,
12041 (__mmask8) __U);
12044 extern __inline __m256d
12045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12046 _mm256_maskz_permutex_pd (__mmask8 __U, __m256d __X, const int __imm)
12048 return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __imm,
12049 (__v4df)
12050 _mm256_setzero_pd (),
12051 (__mmask8) __U);
12054 extern __inline __m256d
12055 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12056 _mm256_mask_permute_pd (__m256d __W, __mmask8 __U, __m256d __X,
12057 const int __C)
12059 return (__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df) __X, __C,
12060 (__v4df) __W,
12061 (__mmask8) __U);
12064 extern __inline __m256d
12065 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12066 _mm256_maskz_permute_pd (__mmask8 __U, __m256d __X, const int __C)
12068 return (__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df) __X, __C,
12069 (__v4df)
12070 _mm256_setzero_pd (),
12071 (__mmask8) __U);
12074 extern __inline __m128d
12075 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12076 _mm_mask_permute_pd (__m128d __W, __mmask8 __U, __m128d __X,
12077 const int __C)
12079 return (__m128d) __builtin_ia32_vpermilpd_mask ((__v2df) __X, __C,
12080 (__v2df) __W,
12081 (__mmask8) __U);
12084 extern __inline __m128d
12085 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12086 _mm_maskz_permute_pd (__mmask8 __U, __m128d __X, const int __C)
12088 return (__m128d) __builtin_ia32_vpermilpd_mask ((__v2df) __X, __C,
12089 (__v2df)
12090 _mm_setzero_pd (),
12091 (__mmask8) __U);
12094 extern __inline __m256
12095 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12096 _mm256_mask_permute_ps (__m256 __W, __mmask8 __U, __m256 __X,
12097 const int __C)
12099 return (__m256) __builtin_ia32_vpermilps256_mask ((__v8sf) __X, __C,
12100 (__v8sf) __W,
12101 (__mmask8) __U);
12104 extern __inline __m256
12105 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12106 _mm256_maskz_permute_ps (__mmask8 __U, __m256 __X, const int __C)
12108 return (__m256) __builtin_ia32_vpermilps256_mask ((__v8sf) __X, __C,
12109 (__v8sf)
12110 _mm256_setzero_ps (),
12111 (__mmask8) __U);
12114 extern __inline __m128
12115 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12116 _mm_mask_permute_ps (__m128 __W, __mmask8 __U, __m128 __X,
12117 const int __C)
12119 return (__m128) __builtin_ia32_vpermilps_mask ((__v4sf) __X, __C,
12120 (__v4sf) __W,
12121 (__mmask8) __U);
12124 extern __inline __m128
12125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12126 _mm_maskz_permute_ps (__mmask8 __U, __m128 __X, const int __C)
12128 return (__m128) __builtin_ia32_vpermilps_mask ((__v4sf) __X, __C,
12129 (__v4sf)
12130 _mm_setzero_ps (),
12131 (__mmask8) __U);
12134 extern __inline __m256d
12135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12136 _mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W)
12138 return (__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) __A,
12139 (__v4df) __W,
12140 (__mmask8) __U);
12143 extern __inline __m256
12144 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12145 _mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W)
12147 return (__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) __A,
12148 (__v8sf) __W,
12149 (__mmask8) __U);
12152 extern __inline __m256i
12153 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12154 _mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W)
12156 return (__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) __A,
12157 (__v4di) __W,
12158 (__mmask8) __U);
12161 extern __inline __m256i
12162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12163 _mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W)
12165 return (__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) __A,
12166 (__v8si) __W,
12167 (__mmask8) __U);
12170 extern __inline __m128d
12171 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12172 _mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W)
12174 return (__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) __A,
12175 (__v2df) __W,
12176 (__mmask8) __U);
12179 extern __inline __m128
12180 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12181 _mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W)
12183 return (__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) __A,
12184 (__v4sf) __W,
12185 (__mmask8) __U);
12188 extern __inline __m128i
12189 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12190 _mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W)
12192 return (__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) __A,
12193 (__v2di) __W,
12194 (__mmask8) __U);
12197 extern __inline __m128i
12198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12199 _mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W)
12201 return (__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) __A,
12202 (__v4si) __W,
12203 (__mmask8) __U);
12206 extern __inline __mmask8
12207 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12208 _mm256_cmp_epi64_mask (__m256i __X, __m256i __Y, const int __P)
12210 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12211 (__v4di) __Y, __P,
12212 (__mmask8) -1);
12215 extern __inline __mmask8
12216 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12217 _mm256_cmp_epi32_mask (__m256i __X, __m256i __Y, const int __P)
12219 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
12220 (__v8si) __Y, __P,
12221 (__mmask8) -1);
12224 extern __inline __mmask8
12225 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12226 _mm256_cmp_epu64_mask (__m256i __X, __m256i __Y, const int __P)
12228 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
12229 (__v4di) __Y, __P,
12230 (__mmask8) -1);
12233 extern __inline __mmask8
12234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12235 _mm256_cmp_epu32_mask (__m256i __X, __m256i __Y, const int __P)
12237 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
12238 (__v8si) __Y, __P,
12239 (__mmask8) -1);
12242 extern __inline __mmask8
12243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12244 _mm256_cmp_pd_mask (__m256d __X, __m256d __Y, const int __P)
12246 return (__mmask8) __builtin_ia32_cmppd256_mask ((__v4df) __X,
12247 (__v4df) __Y, __P,
12248 (__mmask8) -1);
12251 extern __inline __mmask8
12252 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12253 _mm256_cmp_ps_mask (__m256 __X, __m256 __Y, const int __P)
12255 return (__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf) __X,
12256 (__v8sf) __Y, __P,
12257 (__mmask8) -1);
12260 extern __inline __mmask8
12261 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12262 _mm256_mask_cmp_epi64_mask (__mmask8 __U, __m256i __X, __m256i __Y,
12263 const int __P)
12265 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12266 (__v4di) __Y, __P,
12267 (__mmask8) __U);
12270 extern __inline __mmask8
12271 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12272 _mm256_mask_cmp_epi32_mask (__mmask8 __U, __m256i __X, __m256i __Y,
12273 const int __P)
12275 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
12276 (__v8si) __Y, __P,
12277 (__mmask8) __U);
12280 extern __inline __mmask8
12281 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12282 _mm256_mask_cmp_epu64_mask (__mmask8 __U, __m256i __X, __m256i __Y,
12283 const int __P)
12285 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
12286 (__v4di) __Y, __P,
12287 (__mmask8) __U);
12290 extern __inline __mmask8
12291 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12292 _mm256_mask_cmp_epu32_mask (__mmask8 __U, __m256i __X, __m256i __Y,
12293 const int __P)
12295 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
12296 (__v8si) __Y, __P,
12297 (__mmask8) __U);
12300 extern __inline __mmask8
12301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12302 _mm256_mask_cmp_pd_mask (__mmask8 __U, __m256d __X, __m256d __Y,
12303 const int __P)
12305 return (__mmask8) __builtin_ia32_cmppd256_mask ((__v4df) __X,
12306 (__v4df) __Y, __P,
12307 (__mmask8) __U);
12310 extern __inline __mmask8
12311 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12312 _mm256_mask_cmp_ps_mask (__mmask8 __U, __m256 __X, __m256 __Y,
12313 const int __P)
12315 return (__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf) __X,
12316 (__v8sf) __Y, __P,
12317 (__mmask8) __U);
12320 extern __inline __mmask8
12321 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12322 _mm_cmp_epi64_mask (__m128i __X, __m128i __Y, const int __P)
12324 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12325 (__v2di) __Y, __P,
12326 (__mmask8) -1);
12329 extern __inline __mmask8
12330 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12331 _mm_cmp_epi32_mask (__m128i __X, __m128i __Y, const int __P)
12333 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12334 (__v4si) __Y, __P,
12335 (__mmask8) -1);
12338 extern __inline __mmask8
12339 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12340 _mm_cmp_epu64_mask (__m128i __X, __m128i __Y, const int __P)
12342 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12343 (__v2di) __Y, __P,
12344 (__mmask8) -1);
12347 extern __inline __mmask8
12348 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12349 _mm_cmp_epu32_mask (__m128i __X, __m128i __Y, const int __P)
12351 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12352 (__v4si) __Y, __P,
12353 (__mmask8) -1);
12356 extern __inline __mmask8
12357 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12358 _mm_cmp_pd_mask (__m128d __X, __m128d __Y, const int __P)
12360 return (__mmask8) __builtin_ia32_cmppd128_mask ((__v2df) __X,
12361 (__v2df) __Y, __P,
12362 (__mmask8) -1);
12365 extern __inline __mmask8
12366 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12367 _mm_cmp_ps_mask (__m128 __X, __m128 __Y, const int __P)
12369 return (__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf) __X,
12370 (__v4sf) __Y, __P,
12371 (__mmask8) -1);
12374 extern __inline __mmask8
12375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12376 _mm_mask_cmp_epi64_mask (__mmask8 __U, __m128i __X, __m128i __Y,
12377 const int __P)
12379 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12380 (__v2di) __Y, __P,
12381 (__mmask8) __U);
12384 extern __inline __mmask8
12385 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12386 _mm_mask_cmp_epi32_mask (__mmask8 __U, __m128i __X, __m128i __Y,
12387 const int __P)
12389 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12390 (__v4si) __Y, __P,
12391 (__mmask8) __U);
12394 extern __inline __mmask8
12395 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12396 _mm_mask_cmp_epu64_mask (__mmask8 __U, __m128i __X, __m128i __Y,
12397 const int __P)
12399 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12400 (__v2di) __Y, __P,
12401 (__mmask8) __U);
12404 extern __inline __mmask8
12405 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12406 _mm_mask_cmp_epu32_mask (__mmask8 __U, __m128i __X, __m128i __Y,
12407 const int __P)
12409 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12410 (__v4si) __Y, __P,
12411 (__mmask8) __U);
12414 extern __inline __mmask8
12415 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12416 _mm_mask_cmp_pd_mask (__mmask8 __U, __m128d __X, __m128d __Y,
12417 const int __P)
12419 return (__mmask8) __builtin_ia32_cmppd128_mask ((__v2df) __X,
12420 (__v2df) __Y, __P,
12421 (__mmask8) __U);
12424 extern __inline __mmask8
12425 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12426 _mm_mask_cmp_ps_mask (__mmask8 __U, __m128 __X, __m128 __Y,
12427 const int __P)
12429 return (__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf) __X,
12430 (__v4sf) __Y, __P,
12431 (__mmask8) __U);
12434 extern __inline __m256d
12435 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12436 _mm256_permutex_pd (__m256d __X, const int __M)
12438 return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __M,
12439 (__v4df)
12440 _mm256_undefined_pd (),
12441 (__mmask8) -1);
12444 #else
12445 #define _mm256_permutex_pd(X, M) \
12446 ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(X), (int)(M), \
12447 (__v4df)(__m256d) \
12448 _mm256_undefined_pd (), \
12449 (__mmask8)-1))
12451 #define _mm256_permutex_epi64(X, I) \
12452 ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
12453 (int)(I), \
12454 (__v4di)(__m256i) \
12455 (_mm256_setzero_si256 ()),\
12456 (__mmask8) -1))
12458 #define _mm256_maskz_permutex_epi64(M, X, I) \
12459 ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
12460 (int)(I), \
12461 (__v4di)(__m256i) \
12462 (_mm256_setzero_si256 ()),\
12463 (__mmask8)(M)))
12465 #define _mm256_mask_permutex_epi64(W, M, X, I) \
12466 ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
12467 (int)(I), \
12468 (__v4di)(__m256i)(W), \
12469 (__mmask8)(M)))
12471 #define _mm256_insertf32x4(X, Y, C) \
12472 ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \
12473 (__v4sf)(__m128) (Y), (int) (C), \
12474 (__v8sf)(__m256)_mm256_setzero_ps (), \
12475 (__mmask8)-1))
12477 #define _mm256_mask_insertf32x4(W, U, X, Y, C) \
12478 ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \
12479 (__v4sf)(__m128) (Y), (int) (C), \
12480 (__v8sf)(__m256)(W), \
12481 (__mmask8)(U)))
12483 #define _mm256_maskz_insertf32x4(U, X, Y, C) \
12484 ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \
12485 (__v4sf)(__m128) (Y), (int) (C), \
12486 (__v8sf)(__m256)_mm256_setzero_ps (), \
12487 (__mmask8)(U)))
12489 #define _mm256_inserti32x4(X, Y, C) \
12490 ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
12491 (__v4si)(__m128i) (Y), (int) (C), \
12492 (__v8si)(__m256i)_mm256_setzero_si256 (), \
12493 (__mmask8)-1))
12495 #define _mm256_mask_inserti32x4(W, U, X, Y, C) \
12496 ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
12497 (__v4si)(__m128i) (Y), (int) (C), \
12498 (__v8si)(__m256i)(W), \
12499 (__mmask8)(U)))
12501 #define _mm256_maskz_inserti32x4(U, X, Y, C) \
12502 ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
12503 (__v4si)(__m128i) (Y), (int) (C), \
12504 (__v8si)(__m256i)_mm256_setzero_si256 (), \
12505 (__mmask8)(U)))
12507 #define _mm256_extractf32x4_ps(X, C) \
12508 ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
12509 (int) (C), \
12510 (__v4sf)(__m128)_mm_setzero_ps (), \
12511 (__mmask8)-1))
12513 #define _mm256_mask_extractf32x4_ps(W, U, X, C) \
12514 ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
12515 (int) (C), \
12516 (__v4sf)(__m128)(W), \
12517 (__mmask8)(U)))
12519 #define _mm256_maskz_extractf32x4_ps(U, X, C) \
12520 ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
12521 (int) (C), \
12522 (__v4sf)(__m128)_mm_setzero_ps (), \
12523 (__mmask8)(U)))
12525 #define _mm256_extracti32x4_epi32(X, C) \
12526 ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
12527 (int) (C), (__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)-1))
12529 #define _mm256_mask_extracti32x4_epi32(W, U, X, C) \
12530 ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
12531 (int) (C), (__v4si)(__m128i)(W), (__mmask8)(U)))
12533 #define _mm256_maskz_extracti32x4_epi32(U, X, C) \
12534 ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
12535 (int) (C), (__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)(U)))
12537 #define _mm256_shuffle_i64x2(X, Y, C) \
12538 ((__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X), \
12539 (__v4di)(__m256i)(Y), (int)(C), \
12540 (__v4di)(__m256i)_mm256_setzero_si256 (), \
12541 (__mmask8)-1))
12543 #define _mm256_mask_shuffle_i64x2(W, U, X, Y, C) \
12544 ((__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X), \
12545 (__v4di)(__m256i)(Y), (int)(C), \
12546 (__v4di)(__m256i)(W),\
12547 (__mmask8)(U)))
12549 #define _mm256_maskz_shuffle_i64x2(U, X, Y, C) \
12550 ((__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X), \
12551 (__v4di)(__m256i)(Y), (int)(C), \
12552 (__v4di)(__m256i)_mm256_setzero_si256 (), \
12553 (__mmask8)(U)))
12555 #define _mm256_shuffle_i32x4(X, Y, C) \
12556 ((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \
12557 (__v8si)(__m256i)(Y), (int)(C), \
12558 (__v8si)(__m256i) \
12559 _mm256_setzero_si256 (), \
12560 (__mmask8)-1))
12562 #define _mm256_mask_shuffle_i32x4(W, U, X, Y, C) \
12563 ((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \
12564 (__v8si)(__m256i)(Y), (int)(C), \
12565 (__v8si)(__m256i)(W), \
12566 (__mmask8)(U)))
12568 #define _mm256_maskz_shuffle_i32x4(U, X, Y, C) \
12569 ((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \
12570 (__v8si)(__m256i)(Y), (int)(C), \
12571 (__v8si)(__m256i) \
12572 _mm256_setzero_si256 (), \
12573 (__mmask8)(U)))
12575 #define _mm256_shuffle_f64x2(X, Y, C) \
12576 ((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \
12577 (__v4df)(__m256d)(Y), (int)(C), \
12578 (__v4df)(__m256d)_mm256_setzero_pd (),\
12579 (__mmask8)-1))
12581 #define _mm256_mask_shuffle_f64x2(W, U, X, Y, C) \
12582 ((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \
12583 (__v4df)(__m256d)(Y), (int)(C), \
12584 (__v4df)(__m256d)(W), \
12585 (__mmask8)(U)))
12587 #define _mm256_maskz_shuffle_f64x2(U, X, Y, C) \
12588 ((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \
12589 (__v4df)(__m256d)(Y), (int)(C), \
12590 (__v4df)(__m256d)_mm256_setzero_pd( ),\
12591 (__mmask8)(U)))
12593 #define _mm256_shuffle_f32x4(X, Y, C) \
12594 ((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \
12595 (__v8sf)(__m256)(Y), (int)(C), \
12596 (__v8sf)(__m256)_mm256_setzero_ps (), \
12597 (__mmask8)-1))
12599 #define _mm256_mask_shuffle_f32x4(W, U, X, Y, C) \
12600 ((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \
12601 (__v8sf)(__m256)(Y), (int)(C), \
12602 (__v8sf)(__m256)(W), \
12603 (__mmask8)(U)))
12605 #define _mm256_maskz_shuffle_f32x4(U, X, Y, C) \
12606 ((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \
12607 (__v8sf)(__m256)(Y), (int)(C), \
12608 (__v8sf)(__m256)_mm256_setzero_ps (), \
12609 (__mmask8)(U)))
12611 #define _mm256_mask_shuffle_pd(W, U, A, B, C) \
12612 ((__m256d)__builtin_ia32_shufpd256_mask ((__v4df)(__m256d)(A), \
12613 (__v4df)(__m256d)(B), (int)(C), \
12614 (__v4df)(__m256d)(W), \
12615 (__mmask8)(U)))
12617 #define _mm256_maskz_shuffle_pd(U, A, B, C) \
12618 ((__m256d)__builtin_ia32_shufpd256_mask ((__v4df)(__m256d)(A), \
12619 (__v4df)(__m256d)(B), (int)(C), \
12620 (__v4df)(__m256d) \
12621 _mm256_setzero_pd (), \
12622 (__mmask8)(U)))
12624 #define _mm_mask_shuffle_pd(W, U, A, B, C) \
12625 ((__m128d)__builtin_ia32_shufpd128_mask ((__v2df)(__m128d)(A), \
12626 (__v2df)(__m128d)(B), (int)(C), \
12627 (__v2df)(__m128d)(W), \
12628 (__mmask8)(U)))
12630 #define _mm_maskz_shuffle_pd(U, A, B, C) \
12631 ((__m128d)__builtin_ia32_shufpd128_mask ((__v2df)(__m128d)(A), \
12632 (__v2df)(__m128d)(B), (int)(C), \
12633 (__v2df)(__m128d)_mm_setzero_pd (), \
12634 (__mmask8)(U)))
12636 #define _mm256_mask_shuffle_ps(W, U, A, B, C) \
12637 ((__m256) __builtin_ia32_shufps256_mask ((__v8sf)(__m256)(A), \
12638 (__v8sf)(__m256)(B), (int)(C), \
12639 (__v8sf)(__m256)(W), \
12640 (__mmask8)(U)))
12642 #define _mm256_maskz_shuffle_ps(U, A, B, C) \
12643 ((__m256) __builtin_ia32_shufps256_mask ((__v8sf)(__m256)(A), \
12644 (__v8sf)(__m256)(B), (int)(C), \
12645 (__v8sf)(__m256)_mm256_setzero_ps (),\
12646 (__mmask8)(U)))
12648 #define _mm_mask_shuffle_ps(W, U, A, B, C) \
12649 ((__m128) __builtin_ia32_shufps128_mask ((__v4sf)(__m128)(A), \
12650 (__v4sf)(__m128)(B), (int)(C), \
12651 (__v4sf)(__m128)(W), \
12652 (__mmask8)(U)))
12654 #define _mm_maskz_shuffle_ps(U, A, B, C) \
12655 ((__m128) __builtin_ia32_shufps128_mask ((__v4sf)(__m128)(A), \
12656 (__v4sf)(__m128)(B), (int)(C), \
12657 (__v4sf)(__m128)_mm_setzero_ps (), \
12658 (__mmask8)(U)))
12660 #define _mm256_fixupimm_pd(X, Y, Z, C) \
12661 ((__m256d)__builtin_ia32_fixupimmpd256_mask ((__v4df)(__m256d)(X), \
12662 (__v4df)(__m256d)(Y), \
12663 (__v4di)(__m256i)(Z), (int)(C), \
12664 (__mmask8)(-1)))
12666 #define _mm256_mask_fixupimm_pd(X, U, Y, Z, C) \
12667 ((__m256d)__builtin_ia32_fixupimmpd256_mask ((__v4df)(__m256d)(X), \
12668 (__v4df)(__m256d)(Y), \
12669 (__v4di)(__m256i)(Z), (int)(C), \
12670 (__mmask8)(U)))
12672 #define _mm256_maskz_fixupimm_pd(U, X, Y, Z, C) \
12673 ((__m256d)__builtin_ia32_fixupimmpd256_maskz ((__v4df)(__m256d)(X), \
12674 (__v4df)(__m256d)(Y), \
12675 (__v4di)(__m256i)(Z), (int)(C),\
12676 (__mmask8)(U)))
12678 #define _mm256_fixupimm_ps(X, Y, Z, C) \
12679 ((__m256)__builtin_ia32_fixupimmps256_mask ((__v8sf)(__m256)(X), \
12680 (__v8sf)(__m256)(Y), \
12681 (__v8si)(__m256i)(Z), (int)(C), \
12682 (__mmask8)(-1)))
12685 #define _mm256_mask_fixupimm_ps(X, U, Y, Z, C) \
12686 ((__m256)__builtin_ia32_fixupimmps256_mask ((__v8sf)(__m256)(X), \
12687 (__v8sf)(__m256)(Y), \
12688 (__v8si)(__m256i)(Z), (int)(C), \
12689 (__mmask8)(U)))
12691 #define _mm256_maskz_fixupimm_ps(U, X, Y, Z, C) \
12692 ((__m256)__builtin_ia32_fixupimmps256_maskz ((__v8sf)(__m256)(X), \
12693 (__v8sf)(__m256)(Y), \
12694 (__v8si)(__m256i)(Z), (int)(C),\
12695 (__mmask8)(U)))
12697 #define _mm_fixupimm_pd(X, Y, Z, C) \
12698 ((__m128d)__builtin_ia32_fixupimmpd128_mask ((__v2df)(__m128d)(X), \
12699 (__v2df)(__m128d)(Y), \
12700 (__v2di)(__m128i)(Z), (int)(C), \
12701 (__mmask8)(-1)))
12704 #define _mm_mask_fixupimm_pd(X, U, Y, Z, C) \
12705 ((__m128d)__builtin_ia32_fixupimmpd128_mask ((__v2df)(__m128d)(X), \
12706 (__v2df)(__m128d)(Y), \
12707 (__v2di)(__m128i)(Z), (int)(C), \
12708 (__mmask8)(U)))
12710 #define _mm_maskz_fixupimm_pd(U, X, Y, Z, C) \
12711 ((__m128d)__builtin_ia32_fixupimmpd128_maskz ((__v2df)(__m128d)(X), \
12712 (__v2df)(__m128d)(Y), \
12713 (__v2di)(__m128i)(Z), (int)(C),\
12714 (__mmask8)(U)))
12716 #define _mm_fixupimm_ps(X, Y, Z, C) \
12717 ((__m128)__builtin_ia32_fixupimmps128_mask ((__v4sf)(__m128)(X), \
12718 (__v4sf)(__m128)(Y), \
12719 (__v4si)(__m128i)(Z), (int)(C), \
12720 (__mmask8)(-1)))
12722 #define _mm_mask_fixupimm_ps(X, U, Y, Z, C) \
12723 ((__m128)__builtin_ia32_fixupimmps128_mask ((__v4sf)(__m128)(X), \
12724 (__v4sf)(__m128)(Y), \
12725 (__v4si)(__m128i)(Z), (int)(C),\
12726 (__mmask8)(U)))
12728 #define _mm_maskz_fixupimm_ps(U, X, Y, Z, C) \
12729 ((__m128)__builtin_ia32_fixupimmps128_maskz ((__v4sf)(__m128)(X), \
12730 (__v4sf)(__m128)(Y), \
12731 (__v4si)(__m128i)(Z), (int)(C),\
12732 (__mmask8)(U)))
12734 #define _mm256_mask_srli_epi32(W, U, A, B) \
12735 ((__m256i) __builtin_ia32_psrldi256_mask ((__v8si)(__m256i)(A), \
12736 (int)(B), (__v8si)(__m256i)(W), (__mmask8)(U)))
12738 #define _mm256_maskz_srli_epi32(U, A, B) \
12739 ((__m256i) __builtin_ia32_psrldi256_mask ((__v8si)(__m256i)(A), \
12740 (int)(B), (__v8si)_mm256_setzero_si256 (), (__mmask8)(U)))
12742 #define _mm_mask_srli_epi32(W, U, A, B) \
12743 ((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A), \
12744 (int)(B), (__v4si)(__m128i)(W), (__mmask8)(U)))
12746 #define _mm_maskz_srli_epi32(U, A, B) \
12747 ((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A), \
12748 (int)(B), (__v4si)_mm_setzero_si128 (), (__mmask8)(U)))
12750 #define _mm256_mask_srli_epi64(W, U, A, B) \
12751 ((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A), \
12752 (int)(B), (__v4di)(__m256i)(W), (__mmask8)(U)))
12754 #define _mm256_maskz_srli_epi64(U, A, B) \
12755 ((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A), \
12756 (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)(U)))
12758 #define _mm_mask_srli_epi64(W, U, A, B) \
12759 ((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A), \
12760 (int)(B), (__v2di)(__m128i)(W), (__mmask8)(U)))
12762 #define _mm_maskz_srli_epi64(U, A, B) \
12763 ((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A), \
12764 (int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)(U)))
12766 #define _mm256_mask_slli_epi32(W, U, X, C) \
12767 ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\
12768 (__v8si)(__m256i)(W), \
12769 (__mmask8)(U)))
12771 #define _mm256_maskz_slli_epi32(U, X, C) \
12772 ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\
12773 (__v8si)(__m256i)_mm256_setzero_si256 (), \
12774 (__mmask8)(U)))
12776 #define _mm256_mask_slli_epi64(W, U, X, C) \
12777 ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\
12778 (__v4di)(__m256i)(W), \
12779 (__mmask8)(U)))
12781 #define _mm256_maskz_slli_epi64(U, X, C) \
12782 ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\
12783 (__v4di)(__m256i)_mm256_setzero_si256 (), \
12784 (__mmask8)(U)))
12786 #define _mm_mask_slli_epi32(W, U, X, C) \
12787 ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X), (int)(C),\
12788 (__v4si)(__m128i)(W),\
12789 (__mmask8)(U)))
12791 #define _mm_maskz_slli_epi32(U, X, C) \
12792 ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X), (int)(C),\
12793 (__v4si)(__m128i)_mm_setzero_si128 (),\
12794 (__mmask8)(U)))
12796 #define _mm_mask_slli_epi64(W, U, X, C) \
12797 ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C),\
12798 (__v2di)(__m128i)(W),\
12799 (__mmask8)(U)))
12801 #define _mm_maskz_slli_epi64(U, X, C) \
12802 ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C),\
12803 (__v2di)(__m128i)_mm_setzero_si128 (),\
12804 (__mmask8)(U)))
12806 #define _mm256_ternarylogic_epi64(A, B, C, I) \
12807 ((__m256i) __builtin_ia32_pternlogq256_mask ((__v4di)(__m256i)(A), \
12808 (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)-1))
12810 #define _mm256_mask_ternarylogic_epi64(A, U, B, C, I) \
12811 ((__m256i) __builtin_ia32_pternlogq256_mask ((__v4di)(__m256i)(A), \
12812 (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)(U)))
12814 #define _mm256_maskz_ternarylogic_epi64(U, A, B, C, I) \
12815 ((__m256i) __builtin_ia32_pternlogq256_maskz ((__v4di)(__m256i)(A), \
12816 (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)(U)))
12818 #define _mm256_ternarylogic_epi32(A, B, C, I) \
12819 ((__m256i) __builtin_ia32_pternlogd256_mask ((__v8si)(__m256i)(A), \
12820 (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)-1))
12822 #define _mm256_mask_ternarylogic_epi32(A, U, B, C, I) \
12823 ((__m256i) __builtin_ia32_pternlogd256_mask ((__v8si)(__m256i)(A), \
12824 (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)(U)))
12826 #define _mm256_maskz_ternarylogic_epi32(U, A, B, C, I) \
12827 ((__m256i) __builtin_ia32_pternlogd256_maskz ((__v8si)(__m256i)(A), \
12828 (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)(U)))
12830 #define _mm_ternarylogic_epi64(A, B, C, I) \
12831 ((__m128i) __builtin_ia32_pternlogq128_mask ((__v2di)(__m128i)(A), \
12832 (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)-1))
12834 #define _mm_mask_ternarylogic_epi64(A, U, B, C, I) \
12835 ((__m128i) __builtin_ia32_pternlogq128_mask ((__v2di)(__m128i)(A), \
12836 (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)(U)))
12838 #define _mm_maskz_ternarylogic_epi64(U, A, B, C, I) \
12839 ((__m128i) __builtin_ia32_pternlogq128_maskz ((__v2di)(__m128i)(A), \
12840 (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)(U)))
12842 #define _mm_ternarylogic_epi32(A, B, C, I) \
12843 ((__m128i) __builtin_ia32_pternlogd128_mask ((__v4si)(__m128i)(A), \
12844 (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)-1))
12846 #define _mm_mask_ternarylogic_epi32(A, U, B, C, I) \
12847 ((__m128i) __builtin_ia32_pternlogd128_mask ((__v4si)(__m128i)(A), \
12848 (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)(U)))
12850 #define _mm_maskz_ternarylogic_epi32(U, A, B, C, I) \
12851 ((__m128i) __builtin_ia32_pternlogd128_maskz ((__v4si)(__m128i)(A), \
12852 (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)(U)))
12854 #define _mm256_roundscale_ps(A, B) \
12855 ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \
12856 (int)(B), (__v8sf)(__m256)_mm256_setzero_ps (), (__mmask8)-1))
12858 #define _mm256_mask_roundscale_ps(W, U, A, B) \
12859 ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \
12860 (int)(B), (__v8sf)(__m256)(W), (__mmask8)(U)))
12862 #define _mm256_maskz_roundscale_ps(U, A, B) \
12863 ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \
12864 (int)(B), (__v8sf)(__m256)_mm256_setzero_ps (), (__mmask8)(U)))
12866 #define _mm256_roundscale_pd(A, B) \
12867 ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \
12868 (int)(B), (__v4df)(__m256d)_mm256_setzero_pd (), (__mmask8)-1))
12870 #define _mm256_mask_roundscale_pd(W, U, A, B) \
12871 ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \
12872 (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
12874 #define _mm256_maskz_roundscale_pd(U, A, B) \
12875 ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \
12876 (int)(B), (__v4df)(__m256d)_mm256_setzero_pd (), (__mmask8)(U)))
12878 #define _mm_roundscale_ps(A, B) \
12879 ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \
12880 (int)(B), (__v4sf)(__m128)_mm_setzero_ps (), (__mmask8)-1))
12882 #define _mm_mask_roundscale_ps(W, U, A, B) \
12883 ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \
12884 (int)(B), (__v4sf)(__m128)(W), (__mmask8)(U)))
12886 #define _mm_maskz_roundscale_ps(U, A, B) \
12887 ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \
12888 (int)(B), (__v4sf)(__m128)_mm_setzero_ps (), (__mmask8)(U)))
12890 #define _mm_roundscale_pd(A, B) \
12891 ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \
12892 (int)(B), (__v2df)(__m128d)_mm_setzero_pd (), (__mmask8)-1))
12894 #define _mm_mask_roundscale_pd(W, U, A, B) \
12895 ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \
12896 (int)(B), (__v2df)(__m128d)(W), (__mmask8)(U)))
12898 #define _mm_maskz_roundscale_pd(U, A, B) \
12899 ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \
12900 (int)(B), (__v2df)(__m128d)_mm_setzero_pd (), (__mmask8)(U)))
12902 #define _mm256_getmant_ps(X, B, C) \
12903 ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \
12904 (int)(((C)<<2) | (B)), \
12905 (__v8sf)(__m256)_mm256_setzero_ps (), \
12906 (__mmask8)-1))
12908 #define _mm256_mask_getmant_ps(W, U, X, B, C) \
12909 ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \
12910 (int)(((C)<<2) | (B)), \
12911 (__v8sf)(__m256)(W), \
12912 (__mmask8)(U)))
12914 #define _mm256_maskz_getmant_ps(U, X, B, C) \
12915 ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \
12916 (int)(((C)<<2) | (B)), \
12917 (__v8sf)(__m256)_mm256_setzero_ps (), \
12918 (__mmask8)(U)))
12920 #define _mm_getmant_ps(X, B, C) \
12921 ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \
12922 (int)(((C)<<2) | (B)), \
12923 (__v4sf)(__m128)_mm_setzero_ps (), \
12924 (__mmask8)-1))
12926 #define _mm_mask_getmant_ps(W, U, X, B, C) \
12927 ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \
12928 (int)(((C)<<2) | (B)), \
12929 (__v4sf)(__m128)(W), \
12930 (__mmask8)(U)))
12932 #define _mm_maskz_getmant_ps(U, X, B, C) \
12933 ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \
12934 (int)(((C)<<2) | (B)), \
12935 (__v4sf)(__m128)_mm_setzero_ps (), \
12936 (__mmask8)(U)))
12938 #define _mm256_getmant_pd(X, B, C) \
12939 ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \
12940 (int)(((C)<<2) | (B)), \
12941 (__v4df)(__m256d)_mm256_setzero_pd (),\
12942 (__mmask8)-1))
12944 #define _mm256_mask_getmant_pd(W, U, X, B, C) \
12945 ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \
12946 (int)(((C)<<2) | (B)), \
12947 (__v4df)(__m256d)(W), \
12948 (__mmask8)(U)))
12950 #define _mm256_maskz_getmant_pd(U, X, B, C) \
12951 ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \
12952 (int)(((C)<<2) | (B)), \
12953 (__v4df)(__m256d)_mm256_setzero_pd (),\
12954 (__mmask8)(U)))
12956 #define _mm_getmant_pd(X, B, C) \
12957 ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \
12958 (int)(((C)<<2) | (B)), \
12959 (__v2df)(__m128d)_mm_setzero_pd (), \
12960 (__mmask8)-1))
12962 #define _mm_mask_getmant_pd(W, U, X, B, C) \
12963 ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \
12964 (int)(((C)<<2) | (B)), \
12965 (__v2df)(__m128d)(W), \
12966 (__mmask8)(U)))
12968 #define _mm_maskz_getmant_pd(U, X, B, C) \
12969 ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \
12970 (int)(((C)<<2) | (B)), \
12971 (__v2df)(__m128d)_mm_setzero_pd (), \
12972 (__mmask8)(U)))
12974 #define _mm256_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
12975 (__m256) __builtin_ia32_gather3siv8sf ((__v8sf)(__m256)V1OLD, \
12976 (void const *)ADDR, \
12977 (__v8si)(__m256i)INDEX, \
12978 (__mmask8)MASK, (int)SCALE)
12980 #define _mm_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
12981 (__m128) __builtin_ia32_gather3siv4sf ((__v4sf)(__m128)V1OLD, \
12982 (void const *)ADDR, \
12983 (__v4si)(__m128i)INDEX, \
12984 (__mmask8)MASK, (int)SCALE)
12986 #define _mm256_mmask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
12987 (__m256d) __builtin_ia32_gather3siv4df ((__v4df)(__m256d)V1OLD, \
12988 (void const *)ADDR, \
12989 (__v4si)(__m128i)INDEX, \
12990 (__mmask8)MASK, (int)SCALE)
12992 #define _mm_mmask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
12993 (__m128d) __builtin_ia32_gather3siv2df ((__v2df)(__m128d)V1OLD, \
12994 (void const *)ADDR, \
12995 (__v4si)(__m128i)INDEX, \
12996 (__mmask8)MASK, (int)SCALE)
12998 #define _mm256_mmask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
12999 (__m128) __builtin_ia32_gather3div8sf ((__v4sf)(__m128)V1OLD, \
13000 (void const *)ADDR, \
13001 (__v4di)(__m256i)INDEX, \
13002 (__mmask8)MASK, (int)SCALE)
13004 #define _mm_mmask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
13005 (__m128) __builtin_ia32_gather3div4sf ((__v4sf)(__m128)V1OLD, \
13006 (void const *)ADDR, \
13007 (__v2di)(__m128i)INDEX, \
13008 (__mmask8)MASK, (int)SCALE)
13010 #define _mm256_mmask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
13011 (__m256d) __builtin_ia32_gather3div4df ((__v4df)(__m256d)V1OLD, \
13012 (void const *)ADDR, \
13013 (__v4di)(__m256i)INDEX, \
13014 (__mmask8)MASK, (int)SCALE)
13016 #define _mm_mmask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
13017 (__m128d) __builtin_ia32_gather3div2df ((__v2df)(__m128d)V1OLD, \
13018 (void const *)ADDR, \
13019 (__v2di)(__m128i)INDEX, \
13020 (__mmask8)MASK, (int)SCALE)
13022 #define _mm256_mmask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
13023 (__m256i) __builtin_ia32_gather3siv8si ((__v8si)(__m256i)V1OLD, \
13024 (void const *)ADDR, \
13025 (__v8si)(__m256i)INDEX, \
13026 (__mmask8)MASK, (int)SCALE)
13028 #define _mm_mmask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
13029 (__m128i) __builtin_ia32_gather3siv4si ((__v4si)(__m128i)V1OLD, \
13030 (void const *)ADDR, \
13031 (__v4si)(__m128i)INDEX, \
13032 (__mmask8)MASK, (int)SCALE)
13034 #define _mm256_mmask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
13035 (__m256i) __builtin_ia32_gather3siv4di ((__v4di)(__m256i)V1OLD, \
13036 (void const *)ADDR, \
13037 (__v4si)(__m128i)INDEX, \
13038 (__mmask8)MASK, (int)SCALE)
13040 #define _mm_mmask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
13041 (__m128i) __builtin_ia32_gather3siv2di ((__v2di)(__m128i)V1OLD, \
13042 (void const *)ADDR, \
13043 (__v4si)(__m128i)INDEX, \
13044 (__mmask8)MASK, (int)SCALE)
13046 #define _mm256_mmask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
13047 (__m128i) __builtin_ia32_gather3div8si ((__v4si)(__m128i)V1OLD, \
13048 (void const *)ADDR, \
13049 (__v4di)(__m256i)INDEX, \
13050 (__mmask8)MASK, (int)SCALE)
13052 #define _mm_mmask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
13053 (__m128i) __builtin_ia32_gather3div4si ((__v4si)(__m128i)V1OLD, \
13054 (void const *)ADDR, \
13055 (__v2di)(__m128i)INDEX, \
13056 (__mmask8)MASK, (int)SCALE)
13058 #define _mm256_mmask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
13059 (__m256i) __builtin_ia32_gather3div4di ((__v4di)(__m256i)V1OLD, \
13060 (void const *)ADDR, \
13061 (__v4di)(__m256i)INDEX, \
13062 (__mmask8)MASK, (int)SCALE)
13064 #define _mm_mmask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
13065 (__m128i) __builtin_ia32_gather3div2di ((__v2di)(__m128i)V1OLD, \
13066 (void const *)ADDR, \
13067 (__v2di)(__m128i)INDEX, \
13068 (__mmask8)MASK, (int)SCALE)
13070 #define _mm256_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
13071 __builtin_ia32_scattersiv8sf ((void *)ADDR, (__mmask8)0xFF, \
13072 (__v8si)(__m256i)INDEX, \
13073 (__v8sf)(__m256)V1, (int)SCALE)
13075 #define _mm256_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
13076 __builtin_ia32_scattersiv8sf ((void *)ADDR, (__mmask8)MASK, \
13077 (__v8si)(__m256i)INDEX, \
13078 (__v8sf)(__m256)V1, (int)SCALE)
13080 #define _mm_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
13081 __builtin_ia32_scattersiv4sf ((void *)ADDR, (__mmask8)0xFF, \
13082 (__v4si)(__m128i)INDEX, \
13083 (__v4sf)(__m128)V1, (int)SCALE)
13085 #define _mm_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
13086 __builtin_ia32_scattersiv4sf ((void *)ADDR, (__mmask8)MASK, \
13087 (__v4si)(__m128i)INDEX, \
13088 (__v4sf)(__m128)V1, (int)SCALE)
13090 #define _mm256_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
13091 __builtin_ia32_scattersiv4df ((void *)ADDR, (__mmask8)0xFF, \
13092 (__v4si)(__m128i)INDEX, \
13093 (__v4df)(__m256d)V1, (int)SCALE)
13095 #define _mm256_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
13096 __builtin_ia32_scattersiv4df ((void *)ADDR, (__mmask8)MASK, \
13097 (__v4si)(__m128i)INDEX, \
13098 (__v4df)(__m256d)V1, (int)SCALE)
13100 #define _mm_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
13101 __builtin_ia32_scattersiv2df ((void *)ADDR, (__mmask8)0xFF, \
13102 (__v4si)(__m128i)INDEX, \
13103 (__v2df)(__m128d)V1, (int)SCALE)
13105 #define _mm_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
13106 __builtin_ia32_scattersiv2df ((void *)ADDR, (__mmask8)MASK, \
13107 (__v4si)(__m128i)INDEX, \
13108 (__v2df)(__m128d)V1, (int)SCALE)
13110 #define _mm256_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
13111 __builtin_ia32_scatterdiv8sf ((void *)ADDR, (__mmask8)0xFF, \
13112 (__v4di)(__m256i)INDEX, \
13113 (__v4sf)(__m128)V1, (int)SCALE)
13115 #define _mm256_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
13116 __builtin_ia32_scatterdiv8sf ((void *)ADDR, (__mmask8)MASK, \
13117 (__v4di)(__m256i)INDEX, \
13118 (__v4sf)(__m128)V1, (int)SCALE)
13120 #define _mm_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
13121 __builtin_ia32_scatterdiv4sf ((void *)ADDR, (__mmask8)0xFF, \
13122 (__v2di)(__m128i)INDEX, \
13123 (__v4sf)(__m128)V1, (int)SCALE)
13125 #define _mm_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
13126 __builtin_ia32_scatterdiv4sf ((void *)ADDR, (__mmask8)MASK, \
13127 (__v2di)(__m128i)INDEX, \
13128 (__v4sf)(__m128)V1, (int)SCALE)
13130 #define _mm256_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
13131 __builtin_ia32_scatterdiv4df ((void *)ADDR, (__mmask8)0xFF, \
13132 (__v4di)(__m256i)INDEX, \
13133 (__v4df)(__m256d)V1, (int)SCALE)
13135 #define _mm256_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
13136 __builtin_ia32_scatterdiv4df ((void *)ADDR, (__mmask8)MASK, \
13137 (__v4di)(__m256i)INDEX, \
13138 (__v4df)(__m256d)V1, (int)SCALE)
13140 #define _mm_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
13141 __builtin_ia32_scatterdiv2df ((void *)ADDR, (__mmask8)0xFF, \
13142 (__v2di)(__m128i)INDEX, \
13143 (__v2df)(__m128d)V1, (int)SCALE)
13145 #define _mm_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
13146 __builtin_ia32_scatterdiv2df ((void *)ADDR, (__mmask8)MASK, \
13147 (__v2di)(__m128i)INDEX, \
13148 (__v2df)(__m128d)V1, (int)SCALE)
13150 #define _mm256_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
13151 __builtin_ia32_scattersiv8si ((void *)ADDR, (__mmask8)0xFF, \
13152 (__v8si)(__m256i)INDEX, \
13153 (__v8si)(__m256i)V1, (int)SCALE)
13155 #define _mm256_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
13156 __builtin_ia32_scattersiv8si ((void *)ADDR, (__mmask8)MASK, \
13157 (__v8si)(__m256i)INDEX, \
13158 (__v8si)(__m256i)V1, (int)SCALE)
13160 #define _mm_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
13161 __builtin_ia32_scattersiv4si ((void *)ADDR, (__mmask8)0xFF, \
13162 (__v4si)(__m128i)INDEX, \
13163 (__v4si)(__m128i)V1, (int)SCALE)
13165 #define _mm_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
13166 __builtin_ia32_scattersiv4si ((void *)ADDR, (__mmask8)MASK, \
13167 (__v4si)(__m128i)INDEX, \
13168 (__v4si)(__m128i)V1, (int)SCALE)
13170 #define _mm256_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
13171 __builtin_ia32_scattersiv4di ((void *)ADDR, (__mmask8)0xFF, \
13172 (__v4si)(__m128i)INDEX, \
13173 (__v4di)(__m256i)V1, (int)SCALE)
13175 #define _mm256_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
13176 __builtin_ia32_scattersiv4di ((void *)ADDR, (__mmask8)MASK, \
13177 (__v4si)(__m128i)INDEX, \
13178 (__v4di)(__m256i)V1, (int)SCALE)
13180 #define _mm_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
13181 __builtin_ia32_scattersiv2di ((void *)ADDR, (__mmask8)0xFF, \
13182 (__v4si)(__m128i)INDEX, \
13183 (__v2di)(__m128i)V1, (int)SCALE)
13185 #define _mm_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
13186 __builtin_ia32_scattersiv2di ((void *)ADDR, (__mmask8)MASK, \
13187 (__v4si)(__m128i)INDEX, \
13188 (__v2di)(__m128i)V1, (int)SCALE)
13190 #define _mm256_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
13191 __builtin_ia32_scatterdiv8si ((void *)ADDR, (__mmask8)0xFF, \
13192 (__v4di)(__m256i)INDEX, \
13193 (__v4si)(__m128i)V1, (int)SCALE)
13195 #define _mm256_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
13196 __builtin_ia32_scatterdiv8si ((void *)ADDR, (__mmask8)MASK, \
13197 (__v4di)(__m256i)INDEX, \
13198 (__v4si)(__m128i)V1, (int)SCALE)
13200 #define _mm_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
13201 __builtin_ia32_scatterdiv4si ((void *)ADDR, (__mmask8)0xFF, \
13202 (__v2di)(__m128i)INDEX, \
13203 (__v4si)(__m128i)V1, (int)SCALE)
13205 #define _mm_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
13206 __builtin_ia32_scatterdiv4si ((void *)ADDR, (__mmask8)MASK, \
13207 (__v2di)(__m128i)INDEX, \
13208 (__v4si)(__m128i)V1, (int)SCALE)
13210 #define _mm256_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
13211 __builtin_ia32_scatterdiv4di ((void *)ADDR, (__mmask8)0xFF, \
13212 (__v4di)(__m256i)INDEX, \
13213 (__v4di)(__m256i)V1, (int)SCALE)
13215 #define _mm256_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
13216 __builtin_ia32_scatterdiv4di ((void *)ADDR, (__mmask8)MASK, \
13217 (__v4di)(__m256i)INDEX, \
13218 (__v4di)(__m256i)V1, (int)SCALE)
13220 #define _mm_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
13221 __builtin_ia32_scatterdiv2di ((void *)ADDR, (__mmask8)0xFF, \
13222 (__v2di)(__m128i)INDEX, \
13223 (__v2di)(__m128i)V1, (int)SCALE)
13225 #define _mm_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
13226 __builtin_ia32_scatterdiv2di ((void *)ADDR, (__mmask8)MASK, \
13227 (__v2di)(__m128i)INDEX, \
13228 (__v2di)(__m128i)V1, (int)SCALE)
13230 #define _mm256_mask_shuffle_epi32(W, U, X, C) \
13231 ((__m256i) __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C), \
13232 (__v8si)(__m256i)(W), \
13233 (__mmask8)(U)))
13235 #define _mm256_maskz_shuffle_epi32(U, X, C) \
13236 ((__m256i) __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C), \
13237 (__v8si)(__m256i) \
13238 _mm256_setzero_si256 (), \
13239 (__mmask8)(U)))
13241 #define _mm_mask_shuffle_epi32(W, U, X, C) \
13242 ((__m128i) __builtin_ia32_pshufd128_mask ((__v4si)(__m128i)(X), (int)(C), \
13243 (__v4si)(__m128i)(W), \
13244 (__mmask8)(U)))
13246 #define _mm_maskz_shuffle_epi32(U, X, C) \
13247 ((__m128i) __builtin_ia32_pshufd128_mask ((__v4si)(__m128i)(X), (int)(C), \
13248 (__v4si)(__m128i)_mm_setzero_si128 (), \
13249 (__mmask8)(U)))
13251 #define _mm256_rol_epi64(A, B) \
13252 ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13253 (__v4di)(__m256i)_mm256_setzero_si256 (),\
13254 (__mmask8)-1))
13256 #define _mm256_mask_rol_epi64(W, U, A, B) \
13257 ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13258 (__v4di)(__m256i)(W), \
13259 (__mmask8)(U)))
13261 #define _mm256_maskz_rol_epi64(U, A, B) \
13262 ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13263 (__v4di)(__m256i)_mm256_setzero_si256 (),\
13264 (__mmask8)(U)))
13266 #define _mm_rol_epi64(A, B) \
13267 ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \
13268 (__v2di)(__m128i)_mm_setzero_si128 (),\
13269 (__mmask8)-1))
13271 #define _mm_mask_rol_epi64(W, U, A, B) \
13272 ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \
13273 (__v2di)(__m128i)(W), \
13274 (__mmask8)(U)))
13276 #define _mm_maskz_rol_epi64(U, A, B) \
13277 ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \
13278 (__v2di)(__m128i)_mm_setzero_si128 (),\
13279 (__mmask8)(U)))
13281 #define _mm256_ror_epi64(A, B) \
13282 ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13283 (__v4di)(__m256i)_mm256_setzero_si256 (),\
13284 (__mmask8)-1))
13286 #define _mm256_mask_ror_epi64(W, U, A, B) \
13287 ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13288 (__v4di)(__m256i)(W), \
13289 (__mmask8)(U)))
13291 #define _mm256_maskz_ror_epi64(U, A, B) \
13292 ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13293 (__v4di)(__m256i)_mm256_setzero_si256 (),\
13294 (__mmask8)(U)))
13296 #define _mm_ror_epi64(A, B) \
13297 ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \
13298 (__v2di)(__m128i)_mm_setzero_si128 (),\
13299 (__mmask8)-1))
13301 #define _mm_mask_ror_epi64(W, U, A, B) \
13302 ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \
13303 (__v2di)(__m128i)(W), \
13304 (__mmask8)(U)))
13306 #define _mm_maskz_ror_epi64(U, A, B) \
13307 ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \
13308 (__v2di)(__m128i)_mm_setzero_si128 (),\
13309 (__mmask8)(U)))
13311 #define _mm256_rol_epi32(A, B) \
13312 ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \
13313 (__v8si)(__m256i)_mm256_setzero_si256 (),\
13314 (__mmask8)-1))
13316 #define _mm256_mask_rol_epi32(W, U, A, B) \
13317 ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \
13318 (__v8si)(__m256i)(W), \
13319 (__mmask8)(U)))
13321 #define _mm256_maskz_rol_epi32(U, A, B) \
13322 ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \
13323 (__v8si)(__m256i)_mm256_setzero_si256 (),\
13324 (__mmask8)(U)))
13326 #define _mm_rol_epi32(A, B) \
13327 ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \
13328 (__v4si)(__m128i)_mm_setzero_si128 (),\
13329 (__mmask8)-1))
13331 #define _mm_mask_rol_epi32(W, U, A, B) \
13332 ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \
13333 (__v4si)(__m128i)(W), \
13334 (__mmask8)(U)))
13336 #define _mm_maskz_rol_epi32(U, A, B) \
13337 ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \
13338 (__v4si)(__m128i)_mm_setzero_si128 (),\
13339 (__mmask8)(U)))
13341 #define _mm256_ror_epi32(A, B) \
13342 ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \
13343 (__v8si)(__m256i)_mm256_setzero_si256 (),\
13344 (__mmask8)-1))
13346 #define _mm256_mask_ror_epi32(W, U, A, B) \
13347 ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \
13348 (__v8si)(__m256i)(W), \
13349 (__mmask8)(U)))
13351 #define _mm256_maskz_ror_epi32(U, A, B) \
13352 ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \
13353 (__v8si)(__m256i) \
13354 _mm256_setzero_si256 (), \
13355 (__mmask8)(U)))
13357 #define _mm_ror_epi32(A, B) \
13358 ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \
13359 (__v4si)(__m128i)_mm_setzero_si128 (),\
13360 (__mmask8)-1))
13362 #define _mm_mask_ror_epi32(W, U, A, B) \
13363 ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \
13364 (__v4si)(__m128i)(W), \
13365 (__mmask8)(U)))
13367 #define _mm_maskz_ror_epi32(U, A, B) \
13368 ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \
13369 (__v4si)(__m128i)_mm_setzero_si128 (),\
13370 (__mmask8)(U)))
13372 #define _mm256_alignr_epi32(X, Y, C) \
13373 ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X), \
13374 (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)(X), (__mmask8)-1))
13376 #define _mm256_mask_alignr_epi32(W, U, X, Y, C) \
13377 ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X), \
13378 (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)(W), (__mmask8)(U)))
13380 #define _mm256_maskz_alignr_epi32(U, X, Y, C) \
13381 ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X), \
13382 (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)_mm256_setzero_si256 (),\
13383 (__mmask8)(U)))
13385 #define _mm256_alignr_epi64(X, Y, C) \
13386 ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X), \
13387 (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)(X), (__mmask8)-1))
13389 #define _mm256_mask_alignr_epi64(W, U, X, Y, C) \
13390 ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X), \
13391 (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)(W), (__mmask8)(U)))
13393 #define _mm256_maskz_alignr_epi64(U, X, Y, C) \
13394 ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X), \
13395 (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)_mm256_setzero_si256 (),\
13396 (__mmask8)(U)))
13398 #define _mm_alignr_epi32(X, Y, C) \
13399 ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X), \
13400 (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)(X), (__mmask8)-1))
13402 #define _mm_mask_alignr_epi32(W, U, X, Y, C) \
13403 ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X), \
13404 (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)(W), (__mmask8)(U)))
13406 #define _mm_maskz_alignr_epi32(U, X, Y, C) \
13407 ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X), \
13408 (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)_mm_setzero_si128 (),\
13409 (__mmask8)(U)))
13411 #define _mm_alignr_epi64(X, Y, C) \
13412 ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \
13413 (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(X), (__mmask8)-1))
13415 #define _mm_mask_alignr_epi64(W, U, X, Y, C) \
13416 ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \
13417 (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(X), (__mmask8)-1))
13419 #define _mm_maskz_alignr_epi64(U, X, Y, C) \
13420 ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \
13421 (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)_mm_setzero_si128 (),\
13422 (__mmask8)(U)))
13424 #define _mm_mask_cvtps_ph(W, U, A, I) \
13425 ((__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf)(__m128) A, (int) (I), \
13426 (__v8hi)(__m128i) (W), (__mmask8) (U)))
13428 #define _mm_maskz_cvtps_ph(U, A, I) \
13429 ((__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf)(__m128) A, (int) (I), \
13430 (__v8hi)(__m128i) _mm_setzero_si128 (), (__mmask8) (U)))
13432 #define _mm256_mask_cvtps_ph(W, U, A, I) \
13433 ((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) A, (int) (I), \
13434 (__v8hi)(__m128i) (W), (__mmask8) (U)))
13436 #define _mm256_maskz_cvtps_ph(U, A, I) \
13437 ((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) A, (int) (I), \
13438 (__v8hi)(__m128i) _mm_setzero_si128 (), (__mmask8) (U)))
13440 #define _mm256_mask_srai_epi32(W, U, A, B) \
13441 ((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A), \
13442 (int)(B), (__v8si)(__m256i)(W), (__mmask8)(U)))
13444 #define _mm256_maskz_srai_epi32(U, A, B) \
13445 ((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A), \
13446 (int)(B), (__v8si)_mm256_setzero_si256 (), (__mmask8)(U)))
13448 #define _mm_mask_srai_epi32(W, U, A, B) \
13449 ((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A), \
13450 (int)(B), (__v4si)(__m128i)(W), (__mmask8)(U)))
13452 #define _mm_maskz_srai_epi32(U, A, B) \
13453 ((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A), \
13454 (int)(B), (__v4si)_mm_setzero_si128 (), (__mmask8)(U)))
13456 #define _mm256_srai_epi64(A, B) \
13457 ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \
13458 (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)-1))
13460 #define _mm256_mask_srai_epi64(W, U, A, B) \
13461 ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \
13462 (int)(B), (__v4di)(__m256i)(W), (__mmask8)(U)))
13464 #define _mm256_maskz_srai_epi64(U, A, B) \
13465 ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \
13466 (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)(U)))
13468 #define _mm_srai_epi64(A, B) \
13469 ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
13470 (int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)-1))
13472 #define _mm_mask_srai_epi64(W, U, A, B) \
13473 ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
13474 (int)(B), (__v2di)(__m128i)(W), (__mmask8)(U)))
13476 #define _mm_maskz_srai_epi64(U, A, B) \
13477 ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
13478 (int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)(U)))
13480 #define _mm256_mask_permutex_pd(W, U, A, B) \
13481 ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A), \
13482 (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
13484 #define _mm256_maskz_permutex_pd(U, A, B) \
13485 ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A), \
13486 (int)(B), (__v4df)(__m256d)_mm256_setzero_pd (), (__mmask8)(U)))
13488 #define _mm256_mask_permute_pd(W, U, X, C) \
13489 ((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C), \
13490 (__v4df)(__m256d)(W), \
13491 (__mmask8)(U)))
13493 #define _mm256_maskz_permute_pd(U, X, C) \
13494 ((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C), \
13495 (__v4df)(__m256d)_mm256_setzero_pd (),\
13496 (__mmask8)(U)))
13498 #define _mm256_mask_permute_ps(W, U, X, C) \
13499 ((__m256) __builtin_ia32_vpermilps256_mask ((__v8sf)(__m256)(X), (int)(C), \
13500 (__v8sf)(__m256)(W), (__mmask8)(U)))
13502 #define _mm256_maskz_permute_ps(U, X, C) \
13503 ((__m256) __builtin_ia32_vpermilps256_mask ((__v8sf)(__m256)(X), (int)(C), \
13504 (__v8sf)(__m256)_mm256_setzero_ps (), \
13505 (__mmask8)(U)))
13507 #define _mm_mask_permute_pd(W, U, X, C) \
13508 ((__m128d) __builtin_ia32_vpermilpd_mask ((__v2df)(__m128d)(X), (int)(C), \
13509 (__v2df)(__m128d)(W), (__mmask8)(U)))
13511 #define _mm_maskz_permute_pd(U, X, C) \
13512 ((__m128d) __builtin_ia32_vpermilpd_mask ((__v2df)(__m128d)(X), (int)(C), \
13513 (__v2df)(__m128d)_mm_setzero_pd (), \
13514 (__mmask8)(U)))
13516 #define _mm_mask_permute_ps(W, U, X, C) \
13517 ((__m128) __builtin_ia32_vpermilps_mask ((__v4sf)(__m128)(X), (int)(C), \
13518 (__v4sf)(__m128)(W), (__mmask8)(U)))
13520 #define _mm_maskz_permute_ps(U, X, C) \
13521 ((__m128) __builtin_ia32_vpermilps_mask ((__v4sf)(__m128)(X), (int)(C), \
13522 (__v4sf)(__m128)_mm_setzero_ps (), \
13523 (__mmask8)(U)))
13525 #define _mm256_mask_blend_pd(__U, __A, __W) \
13526 ((__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) (__A), \
13527 (__v4df) (__W), \
13528 (__mmask8) (__U)))
13530 #define _mm256_mask_blend_ps(__U, __A, __W) \
13531 ((__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) (__A), \
13532 (__v8sf) (__W), \
13533 (__mmask8) (__U)))
13535 #define _mm256_mask_blend_epi64(__U, __A, __W) \
13536 ((__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) (__A), \
13537 (__v4di) (__W), \
13538 (__mmask8) (__U)))
13540 #define _mm256_mask_blend_epi32(__U, __A, __W) \
13541 ((__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) (__A), \
13542 (__v8si) (__W), \
13543 (__mmask8) (__U)))
13545 #define _mm_mask_blend_pd(__U, __A, __W) \
13546 ((__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) (__A), \
13547 (__v2df) (__W), \
13548 (__mmask8) (__U)))
13550 #define _mm_mask_blend_ps(__U, __A, __W) \
13551 ((__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) (__A), \
13552 (__v4sf) (__W), \
13553 (__mmask8) (__U)))
13555 #define _mm_mask_blend_epi64(__U, __A, __W) \
13556 ((__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) (__A), \
13557 (__v2di) (__W), \
13558 (__mmask8) (__U)))
13560 #define _mm_mask_blend_epi32(__U, __A, __W) \
13561 ((__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) (__A), \
13562 (__v4si) (__W), \
13563 (__mmask8) (__U)))
13565 #define _mm256_cmp_epu32_mask(X, Y, P) \
13566 ((__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si)(__m256i)(X), \
13567 (__v8si)(__m256i)(Y), (int)(P),\
13568 (__mmask8)-1))
13570 #define _mm256_cmp_epi64_mask(X, Y, P) \
13571 ((__mmask8) __builtin_ia32_cmpq256_mask ((__v4di)(__m256i)(X), \
13572 (__v4di)(__m256i)(Y), (int)(P),\
13573 (__mmask8)-1))
13575 #define _mm256_cmp_epi32_mask(X, Y, P) \
13576 ((__mmask8) __builtin_ia32_cmpd256_mask ((__v8si)(__m256i)(X), \
13577 (__v8si)(__m256i)(Y), (int)(P),\
13578 (__mmask8)-1))
13580 #define _mm256_cmp_epu64_mask(X, Y, P) \
13581 ((__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di)(__m256i)(X), \
13582 (__v4di)(__m256i)(Y), (int)(P),\
13583 (__mmask8)-1))
13585 #define _mm256_cmp_pd_mask(X, Y, P) \
13586 ((__mmask8) __builtin_ia32_cmppd256_mask ((__v4df)(__m256d)(X), \
13587 (__v4df)(__m256d)(Y), (int)(P),\
13588 (__mmask8)-1))
13590 #define _mm256_cmp_ps_mask(X, Y, P) \
13591 ((__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf)(__m256)(X), \
13592 (__v8sf)(__m256)(Y), (int)(P),\
13593 (__mmask8)-1))
13595 #define _mm256_mask_cmp_epi64_mask(M, X, Y, P) \
13596 ((__mmask8) __builtin_ia32_cmpq256_mask ((__v4di)(__m256i)(X), \
13597 (__v4di)(__m256i)(Y), (int)(P),\
13598 (__mmask8)(M)))
13600 #define _mm256_mask_cmp_epi32_mask(M, X, Y, P) \
13601 ((__mmask8) __builtin_ia32_cmpd256_mask ((__v8si)(__m256i)(X), \
13602 (__v8si)(__m256i)(Y), (int)(P),\
13603 (__mmask8)(M)))
13605 #define _mm256_mask_cmp_epu64_mask(M, X, Y, P) \
13606 ((__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di)(__m256i)(X), \
13607 (__v4di)(__m256i)(Y), (int)(P),\
13608 (__mmask8)(M)))
13610 #define _mm256_mask_cmp_epu32_mask(M, X, Y, P) \
13611 ((__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si)(__m256i)(X), \
13612 (__v8si)(__m256i)(Y), (int)(P),\
13613 (__mmask8)(M)))
13615 #define _mm256_mask_cmp_pd_mask(M, X, Y, P) \
13616 ((__mmask8) __builtin_ia32_cmppd256_mask ((__v4df)(__m256d)(X), \
13617 (__v4df)(__m256d)(Y), (int)(P),\
13618 (__mmask8)(M)))
13620 #define _mm256_mask_cmp_ps_mask(M, X, Y, P) \
13621 ((__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf)(__m256)(X), \
13622 (__v8sf)(__m256)(Y), (int)(P),\
13623 (__mmask8)(M)))
13625 #define _mm_cmp_epi64_mask(X, Y, P) \
13626 ((__mmask8) __builtin_ia32_cmpq128_mask ((__v2di)(__m128i)(X), \
13627 (__v2di)(__m128i)(Y), (int)(P),\
13628 (__mmask8)-1))
13630 #define _mm_cmp_epi32_mask(X, Y, P) \
13631 ((__mmask8) __builtin_ia32_cmpd128_mask ((__v4si)(__m128i)(X), \
13632 (__v4si)(__m128i)(Y), (int)(P),\
13633 (__mmask8)-1))
13635 #define _mm_cmp_epu64_mask(X, Y, P) \
13636 ((__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di)(__m128i)(X), \
13637 (__v2di)(__m128i)(Y), (int)(P),\
13638 (__mmask8)-1))
13640 #define _mm_cmp_epu32_mask(X, Y, P) \
13641 ((__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si)(__m128i)(X), \
13642 (__v4si)(__m128i)(Y), (int)(P),\
13643 (__mmask8)-1))
13645 #define _mm_cmp_pd_mask(X, Y, P) \
13646 ((__mmask8) __builtin_ia32_cmppd128_mask ((__v2df)(__m128d)(X), \
13647 (__v2df)(__m128d)(Y), (int)(P),\
13648 (__mmask8)-1))
13650 #define _mm_cmp_ps_mask(X, Y, P) \
13651 ((__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf)(__m128)(X), \
13652 (__v4sf)(__m128)(Y), (int)(P),\
13653 (__mmask8)-1))
13655 #define _mm_mask_cmp_epi64_mask(M, X, Y, P) \
13656 ((__mmask8) __builtin_ia32_cmpq128_mask ((__v2di)(__m128i)(X), \
13657 (__v2di)(__m128i)(Y), (int)(P),\
13658 (__mmask8)(M)))
13660 #define _mm_mask_cmp_epi32_mask(M, X, Y, P) \
13661 ((__mmask8) __builtin_ia32_cmpd128_mask ((__v4si)(__m128i)(X), \
13662 (__v4si)(__m128i)(Y), (int)(P),\
13663 (__mmask8)(M)))
13665 #define _mm_mask_cmp_epu64_mask(M, X, Y, P) \
13666 ((__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di)(__m128i)(X), \
13667 (__v2di)(__m128i)(Y), (int)(P),\
13668 (__mmask8)(M)))
13670 #define _mm_mask_cmp_epu32_mask(M, X, Y, P) \
13671 ((__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si)(__m128i)(X), \
13672 (__v4si)(__m128i)(Y), (int)(P),\
13673 (__mmask8)(M)))
13675 #define _mm_mask_cmp_pd_mask(M, X, Y, P) \
13676 ((__mmask8) __builtin_ia32_cmppd128_mask ((__v2df)(__m128d)(X), \
13677 (__v2df)(__m128d)(Y), (int)(P),\
13678 (__mmask8)(M)))
13680 #define _mm_mask_cmp_ps_mask(M, X, Y, P) \
13681 ((__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf)(__m128)(X), \
13682 (__v4sf)(__m128)(Y), (int)(P),\
13683 (__mmask8)(M)))
13685 #endif
13687 #define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps ((B), (A))
13689 #ifdef __DISABLE_AVX512VL__
13690 #undef __DISABLE_AVX512VL__
13691 #pragma GCC pop_options
13692 #endif /* __DISABLE_AVX512VL__ */
13694 #endif /* _AVX512VLINTRIN_H_INCLUDED */