Initial support for AVX-512{VL,BW,DQ}
[official-gcc.git] / gcc / config / i386 / avx512vldqintrin.h
blobbff3ead863774ea0c0ad6615f2ad96ba980d6e87
1 /* Copyright (C) 2014
2 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 Under Section 7 of GPL version 3, you are granted additional
17 permissions described in the GCC Runtime Library Exception, version
18 3.1, as published by the Free Software Foundation.
20 You should have received a copy of the GNU General Public License and
21 a copy of the GCC Runtime Library Exception along with this program;
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 <http://www.gnu.org/licenses/>. */
25 #ifndef _IMMINTRIN_H_INCLUDED
26 #error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead."
27 #endif
29 #ifndef _AVX512VLDQINTRIN_H_INCLUDED
30 #define _AVX512VLDQINTRIN_H_INCLUDED
32 #if !defined(__AVX512VL__) || !defined(__AVX512DQ__)
33 #pragma GCC push_options
34 #pragma GCC target("avx512vl,avx512dq")
35 #define __DISABLE_AVX512VLDQ__
36 #endif /* __AVX512VLDQ__ */
38 extern __inline __m256i
39 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
40 _mm256_cvttpd_epi64 (__m256d __A)
42 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
43 (__v4di)
44 _mm256_setzero_si256 (),
45 (__mmask8) -1);
48 extern __inline __m256i
49 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
50 _mm256_mask_cvttpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A)
52 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
53 (__v4di) __W,
54 (__mmask8) __U);
57 extern __inline __m256i
58 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
59 _mm256_maskz_cvttpd_epi64 (__mmask8 __U, __m256d __A)
61 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
62 (__v4di)
63 _mm256_setzero_si256 (),
64 (__mmask8) __U);
67 extern __inline __m128i
68 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
69 _mm_cvttpd_epi64 (__m128d __A)
71 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
72 (__v2di)
73 _mm_setzero_di (),
74 (__mmask8) -1);
77 extern __inline __m128i
78 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79 _mm_mask_cvttpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A)
81 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
82 (__v2di) __W,
83 (__mmask8) __U);
86 extern __inline __m128i
87 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
88 _mm_maskz_cvttpd_epi64 (__mmask8 __U, __m128d __A)
90 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
91 (__v2di)
92 _mm_setzero_si128 (),
93 (__mmask8) __U);
96 extern __inline __m256i
97 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
98 _mm256_cvttpd_epu64 (__m256d __A)
100 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
101 (__v4di)
102 _mm256_setzero_si256 (),
103 (__mmask8) -1);
106 extern __inline __m256i
107 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
108 _mm256_mask_cvttpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A)
110 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
111 (__v4di) __W,
112 (__mmask8) __U);
115 extern __inline __m256i
116 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
117 _mm256_maskz_cvttpd_epu64 (__mmask8 __U, __m256d __A)
119 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
120 (__v4di)
121 _mm256_setzero_si256 (),
122 (__mmask8) __U);
125 extern __inline __m128i
126 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
127 _mm_cvttpd_epu64 (__m128d __A)
129 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
130 (__v2di)
131 _mm_setzero_di (),
132 (__mmask8) -1);
135 extern __inline __m128i
136 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
137 _mm_mask_cvttpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A)
139 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
140 (__v2di) __W,
141 (__mmask8) __U);
144 extern __inline __m128i
145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
146 _mm_maskz_cvttpd_epu64 (__mmask8 __U, __m128d __A)
148 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
149 (__v2di)
150 _mm_setzero_si128 (),
151 (__mmask8) __U);
154 extern __inline __m256i
155 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
156 _mm256_cvtpd_epi64 (__m256d __A)
158 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
159 (__v4di)
160 _mm256_setzero_si256 (),
161 (__mmask8) -1);
164 extern __inline __m256i
165 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
166 _mm256_mask_cvtpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A)
168 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
169 (__v4di) __W,
170 (__mmask8) __U);
173 extern __inline __m256i
174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
175 _mm256_maskz_cvtpd_epi64 (__mmask8 __U, __m256d __A)
177 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
178 (__v4di)
179 _mm256_setzero_si256 (),
180 (__mmask8) __U);
183 extern __inline __m128i
184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
185 _mm_cvtpd_epi64 (__m128d __A)
187 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
188 (__v2di)
189 _mm_setzero_di (),
190 (__mmask8) -1);
193 extern __inline __m128i
194 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
195 _mm_mask_cvtpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A)
197 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
198 (__v2di) __W,
199 (__mmask8) __U);
202 extern __inline __m128i
203 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
204 _mm_maskz_cvtpd_epi64 (__mmask8 __U, __m128d __A)
206 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
207 (__v2di)
208 _mm_setzero_si128 (),
209 (__mmask8) __U);
212 extern __inline __m256i
213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
214 _mm256_cvtpd_epu64 (__m256d __A)
216 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
217 (__v4di)
218 _mm256_setzero_si256 (),
219 (__mmask8) -1);
222 extern __inline __m256i
223 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
224 _mm256_mask_cvtpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A)
226 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
227 (__v4di) __W,
228 (__mmask8) __U);
231 extern __inline __m256i
232 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
233 _mm256_maskz_cvtpd_epu64 (__mmask8 __U, __m256d __A)
235 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
236 (__v4di)
237 _mm256_setzero_si256 (),
238 (__mmask8) __U);
241 extern __inline __m128i
242 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
243 _mm_cvtpd_epu64 (__m128d __A)
245 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
246 (__v2di)
247 _mm_setzero_di (),
248 (__mmask8) -1);
251 extern __inline __m128i
252 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
253 _mm_mask_cvtpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A)
255 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
256 (__v2di) __W,
257 (__mmask8) __U);
260 extern __inline __m128i
261 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
262 _mm_maskz_cvtpd_epu64 (__mmask8 __U, __m128d __A)
264 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
265 (__v2di)
266 _mm_setzero_si128 (),
267 (__mmask8) __U);
270 extern __inline __m256i
271 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
272 _mm256_cvttps_epi64 (__m128 __A)
274 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
275 (__v4di)
276 _mm256_setzero_si256 (),
277 (__mmask8) -1);
280 extern __inline __m256i
281 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
282 _mm256_mask_cvttps_epi64 (__m256i __W, __mmask8 __U, __m128 __A)
284 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
285 (__v4di) __W,
286 (__mmask8) __U);
289 extern __inline __m256i
290 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
291 _mm256_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A)
293 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
294 (__v4di)
295 _mm256_setzero_si256 (),
296 (__mmask8) __U);
299 extern __inline __m128i
300 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
301 _mm_cvttps_epi64 (__m128 __A)
303 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
304 (__v2di)
305 _mm_setzero_di (),
306 (__mmask8) -1);
309 extern __inline __m128i
310 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
311 _mm_mask_cvttps_epi64 (__m128i __W, __mmask8 __U, __m128 __A)
313 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
314 (__v2di) __W,
315 (__mmask8) __U);
318 extern __inline __m128i
319 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
320 _mm_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A)
322 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
323 (__v2di)
324 _mm_setzero_di (),
325 (__mmask8) __U);
328 extern __inline __m256i
329 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
330 _mm256_cvttps_epu64 (__m128 __A)
332 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
333 (__v4di)
334 _mm256_setzero_si256 (),
335 (__mmask8) -1);
338 extern __inline __m256i
339 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
340 _mm256_mask_cvttps_epu64 (__m256i __W, __mmask8 __U, __m128 __A)
342 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
343 (__v4di) __W,
344 (__mmask8) __U);
347 extern __inline __m256i
348 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
349 _mm256_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A)
351 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
352 (__v4di)
353 _mm256_setzero_si256 (),
354 (__mmask8) __U);
357 extern __inline __m128i
358 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
359 _mm_cvttps_epu64 (__m128 __A)
361 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
362 (__v2di)
363 _mm_setzero_di (),
364 (__mmask8) -1);
367 extern __inline __m128i
368 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
369 _mm_mask_cvttps_epu64 (__m128i __W, __mmask8 __U, __m128 __A)
371 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
372 (__v2di) __W,
373 (__mmask8) __U);
376 extern __inline __m128i
377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
378 _mm_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A)
380 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
381 (__v2di)
382 _mm_setzero_di (),
383 (__mmask8) __U);
386 extern __inline __m256d
387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
388 _mm256_broadcast_f64x2 (__m128d __A)
390 __v4df __O;
391 return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df)
392 __A, __O,
393 (__mmask8) -
397 extern __inline __m256d
398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
399 _mm256_mask_broadcast_f64x2 (__m256d __O, __mmask8 __M, __m128d __A)
401 return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df)
402 __A,
403 (__v4df)
404 __O, __M);
407 extern __inline __m256d
408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
409 _mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
411 return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df)
412 __A,
413 (__v4df)
414 _mm256_setzero_ps (),
415 __M);
418 extern __inline __m256i
419 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
420 _mm256_broadcast_i64x2 (__m128i __A)
422 __v4di __O;
423 return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di)
424 __A, __O,
425 (__mmask8) -
429 extern __inline __m256i
430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
431 _mm256_mask_broadcast_i64x2 (__m256i __O, __mmask8 __M, __m128i __A)
433 return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di)
434 __A,
435 (__v4di)
436 __O, __M);
439 extern __inline __m256i
440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
441 _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
443 return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di)
444 __A,
445 (__v4di)
446 _mm256_setzero_si256 (),
447 __M);
450 extern __inline __m256
451 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
452 _mm256_broadcast_f32x2 (__m128 __A)
454 __v8sf __O;
455 return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
456 __O,
457 (__mmask16) -
461 extern __inline __m256
462 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
463 _mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A)
465 return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
466 (__v8sf) __O,
467 __M);
470 extern __inline __m256
471 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
472 _mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A)
474 return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
475 (__v8sf)
476 _mm256_setzero_ps (),
477 __M);
480 extern __inline __m256i
481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
482 _mm256_broadcast_i32x2 (__m128i __A)
484 __v8si __O;
485 return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si)
486 __A, __O,
487 (__mmask8) -
491 extern __inline __m256i
492 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
493 _mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A)
495 return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si)
496 __A,
497 (__v8si)
498 __O, __M);
501 extern __inline __m256i
502 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
503 _mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
505 return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si)
506 __A,
507 (__v8si)
508 _mm256_setzero_si256 (),
509 __M);
512 extern __inline __m128i
513 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
514 _mm_broadcast_i32x2 (__m128i __A)
516 __v4si __O;
517 return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si)
518 __A, __O,
519 (__mmask8) -
523 extern __inline __m128i
524 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
525 _mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A)
527 return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si)
528 __A,
529 (__v4si)
530 __O, __M);
533 extern __inline __m128i
534 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
535 _mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
537 return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si)
538 __A,
539 (__v4si)
540 _mm_setzero_si128 (),
541 __M);
544 extern __inline __m256i
545 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
546 _mm256_mullo_epi64 (__m256i __A, __m256i __B)
548 return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
549 (__v4di) __B,
550 (__v4di)
551 _mm256_setzero_si256 (),
552 (__mmask8) -1);
555 extern __inline __m256i
556 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
557 _mm256_mask_mullo_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
558 __m256i __B)
560 return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
561 (__v4di) __B,
562 (__v4di) __W,
563 (__mmask8) __U);
566 extern __inline __m256i
567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
568 _mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
570 return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
571 (__v4di) __B,
572 (__v4di)
573 _mm256_setzero_si256 (),
574 (__mmask8) __U);
577 extern __inline __m128i
578 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
579 _mm_mullo_epi64 (__m128i __A, __m128i __B)
581 return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
582 (__v2di) __B,
583 (__v2di)
584 _mm_setzero_di (),
585 (__mmask8) -1);
588 extern __inline __m128i
589 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
590 _mm_mask_mullo_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
591 __m128i __B)
593 return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
594 (__v2di) __B,
595 (__v2di) __W,
596 (__mmask8) __U);
599 extern __inline __m128i
600 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
601 _mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
603 return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
604 (__v2di) __B,
605 (__v2di)
606 _mm_setzero_di (),
607 (__mmask8) __U);
610 extern __inline __m256d
611 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
612 _mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A,
613 __m256d __B)
615 return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
616 (__v4df) __B,
617 (__v4df) __W,
618 (__mmask8) __U);
621 extern __inline __m256d
622 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
623 _mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B)
625 return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
626 (__v4df) __B,
627 (__v4df)
628 _mm256_setzero_pd (),
629 (__mmask8) __U);
632 extern __inline __m128d
633 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
634 _mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A,
635 __m128d __B)
637 return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
638 (__v2df) __B,
639 (__v2df) __W,
640 (__mmask8) __U);
643 extern __inline __m128d
644 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
645 _mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B)
647 return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
648 (__v2df) __B,
649 (__v2df)
650 _mm_setzero_pd (),
651 (__mmask8) __U);
654 extern __inline __m256
655 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
656 _mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A,
657 __m256 __B)
659 return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
660 (__v8sf) __B,
661 (__v8sf) __W,
662 (__mmask8) __U);
665 extern __inline __m256
666 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
667 _mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B)
669 return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
670 (__v8sf) __B,
671 (__v8sf)
672 _mm256_setzero_ps (),
673 (__mmask8) __U);
676 extern __inline __m128
677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
678 _mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
680 return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
681 (__v4sf) __B,
682 (__v4sf) __W,
683 (__mmask8) __U);
686 extern __inline __m128
687 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
688 _mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B)
690 return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
691 (__v4sf) __B,
692 (__v4sf)
693 _mm_setzero_ps (),
694 (__mmask8) __U);
697 extern __inline __m256i
698 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
699 _mm256_cvtps_epi64 (__m128 __A)
701 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
702 (__v4di)
703 _mm256_setzero_si256 (),
704 (__mmask8) -1);
707 extern __inline __m256i
708 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
709 _mm256_mask_cvtps_epi64 (__m256i __W, __mmask8 __U, __m128 __A)
711 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
712 (__v4di) __W,
713 (__mmask8) __U);
716 extern __inline __m256i
717 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
718 _mm256_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A)
720 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
721 (__v4di)
722 _mm256_setzero_si256 (),
723 (__mmask8) __U);
726 extern __inline __m128i
727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
728 _mm_cvtps_epi64 (__m128 __A)
730 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
731 (__v2di)
732 _mm_setzero_di (),
733 (__mmask8) -1);
736 extern __inline __m128i
737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
738 _mm_mask_cvtps_epi64 (__m128i __W, __mmask8 __U, __m128 __A)
740 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
741 (__v2di) __W,
742 (__mmask8) __U);
745 extern __inline __m128i
746 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
747 _mm_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A)
749 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
750 (__v2di)
751 _mm_setzero_di (),
752 (__mmask8) __U);
755 extern __inline __m256i
756 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
757 _mm256_cvtps_epu64 (__m128 __A)
759 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
760 (__v4di)
761 _mm256_setzero_si256 (),
762 (__mmask8) -1);
765 extern __inline __m256i
766 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
767 _mm256_mask_cvtps_epu64 (__m256i __W, __mmask8 __U, __m128 __A)
769 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
770 (__v4di) __W,
771 (__mmask8) __U);
774 extern __inline __m256i
775 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
776 _mm256_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A)
778 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
779 (__v4di)
780 _mm256_setzero_si256 (),
781 (__mmask8) __U);
784 extern __inline __m128i
785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
786 _mm_cvtps_epu64 (__m128 __A)
788 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
789 (__v2di)
790 _mm_setzero_di (),
791 (__mmask8) -1);
794 extern __inline __m128i
795 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
796 _mm_mask_cvtps_epu64 (__m128i __W, __mmask8 __U, __m128 __A)
798 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
799 (__v2di) __W,
800 (__mmask8) __U);
803 extern __inline __m128i
804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
805 _mm_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A)
807 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
808 (__v2di)
809 _mm_setzero_di (),
810 (__mmask8) __U);
813 extern __inline __m128
814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
815 _mm256_cvtepi64_ps (__m256i __A)
817 return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
818 (__v4sf)
819 _mm_setzero_ps (),
820 (__mmask8) -1);
823 extern __inline __m128
824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
825 _mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A)
827 return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
828 (__v4sf) __W,
829 (__mmask8) __U);
832 extern __inline __m128
833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
834 _mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A)
836 return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
837 (__v4sf)
838 _mm_setzero_ps (),
839 (__mmask8) __U);
842 extern __inline __m128
843 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
844 _mm_cvtepi64_ps (__m128i __A)
846 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
847 (__v4sf)
848 _mm_setzero_ps (),
849 (__mmask8) -1);
852 extern __inline __m128
853 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
854 _mm_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m128i __A)
856 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
857 (__v4sf) __W,
858 (__mmask8) __U);
861 extern __inline __m128
862 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
863 _mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A)
865 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
866 (__v4sf)
867 _mm_setzero_ps (),
868 (__mmask8) __U);
871 extern __inline __m128
872 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
873 _mm256_cvtepu64_ps (__m256i __A)
875 return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
876 (__v4sf)
877 _mm_setzero_ps (),
878 (__mmask8) -1);
881 extern __inline __m128
882 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
883 _mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A)
885 return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
886 (__v4sf) __W,
887 (__mmask8) __U);
890 extern __inline __m128
891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
892 _mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A)
894 return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
895 (__v4sf)
896 _mm_setzero_ps (),
897 (__mmask8) __U);
900 extern __inline __m128
901 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
902 _mm_cvtepu64_ps (__m128i __A)
904 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
905 (__v4sf)
906 _mm_setzero_ps (),
907 (__mmask8) -1);
910 extern __inline __m128
911 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
912 _mm_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m128i __A)
914 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
915 (__v4sf) __W,
916 (__mmask8) __U);
919 extern __inline __m128
920 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
921 _mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A)
923 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
924 (__v4sf)
925 _mm_setzero_ps (),
926 (__mmask8) __U);
929 extern __inline __m256d
930 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
931 _mm256_cvtepi64_pd (__m256i __A)
933 return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
934 (__v4df)
935 _mm256_setzero_pd (),
936 (__mmask8) -1);
939 extern __inline __m256d
940 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
941 _mm256_mask_cvtepi64_pd (__m256d __W, __mmask8 __U, __m256i __A)
943 return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
944 (__v4df) __W,
945 (__mmask8) __U);
948 extern __inline __m256d
949 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
950 _mm256_maskz_cvtepi64_pd (__mmask8 __U, __m256i __A)
952 return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
953 (__v4df)
954 _mm256_setzero_pd (),
955 (__mmask8) __U);
958 extern __inline __m128d
959 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
960 _mm_cvtepi64_pd (__m128i __A)
962 return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
963 (__v2df)
964 _mm_setzero_pd (),
965 (__mmask8) -1);
968 extern __inline __m128d
969 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
970 _mm_mask_cvtepi64_pd (__m128d __W, __mmask8 __U, __m128i __A)
972 return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
973 (__v2df) __W,
974 (__mmask8) __U);
977 extern __inline __m128d
978 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
979 _mm_maskz_cvtepi64_pd (__mmask8 __U, __m128i __A)
981 return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
982 (__v2df)
983 _mm_setzero_pd (),
984 (__mmask8) __U);
987 extern __inline __m256d
988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
989 _mm256_cvtepu64_pd (__m256i __A)
991 return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
992 (__v4df)
993 _mm256_setzero_pd (),
994 (__mmask8) -1);
997 extern __inline __m256d
998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
999 _mm256_mask_cvtepu64_pd (__m256d __W, __mmask8 __U, __m256i __A)
1001 return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
1002 (__v4df) __W,
1003 (__mmask8) __U);
1006 extern __inline __m256d
1007 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1008 _mm256_maskz_cvtepu64_pd (__mmask8 __U, __m256i __A)
1010 return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
1011 (__v4df)
1012 _mm256_setzero_pd (),
1013 (__mmask8) __U);
1016 extern __inline __m256d
1017 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1018 _mm256_mask_and_pd (__m256d __W, __mmask8 __U, __m256d __A,
1019 __m256d __B)
1021 return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
1022 (__v4df) __B,
1023 (__v4df) __W,
1024 (__mmask8) __U);
1027 extern __inline __m256d
1028 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1029 _mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B)
1031 return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
1032 (__v4df) __B,
1033 (__v4df)
1034 _mm256_setzero_pd (),
1035 (__mmask8) __U);
1038 extern __inline __m128d
1039 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1040 _mm_mask_and_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1042 return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
1043 (__v2df) __B,
1044 (__v2df) __W,
1045 (__mmask8) __U);
1048 extern __inline __m128d
1049 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1050 _mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B)
1052 return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
1053 (__v2df) __B,
1054 (__v2df)
1055 _mm_setzero_pd (),
1056 (__mmask8) __U);
1059 extern __inline __m256
1060 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1061 _mm256_mask_and_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
1063 return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
1064 (__v8sf) __B,
1065 (__v8sf) __W,
1066 (__mmask8) __U);
1069 extern __inline __m256
1070 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1071 _mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B)
1073 return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
1074 (__v8sf) __B,
1075 (__v8sf)
1076 _mm256_setzero_ps (),
1077 (__mmask8) __U);
1080 extern __inline __m128
1081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1082 _mm_mask_and_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1084 return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
1085 (__v4sf) __B,
1086 (__v4sf) __W,
1087 (__mmask8) __U);
1090 extern __inline __m128
1091 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1092 _mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B)
1094 return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
1095 (__v4sf) __B,
1096 (__v4sf)
1097 _mm_setzero_ps (),
1098 (__mmask8) __U);
1101 extern __inline __m128d
1102 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1103 _mm_cvtepu64_pd (__m128i __A)
1105 return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
1106 (__v2df)
1107 _mm_setzero_pd (),
1108 (__mmask8) -1);
1111 extern __inline __m128d
1112 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1113 _mm_mask_cvtepu64_pd (__m128d __W, __mmask8 __U, __m128i __A)
1115 return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
1116 (__v2df) __W,
1117 (__mmask8) __U);
1120 extern __inline __m128d
1121 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1122 _mm_maskz_cvtepu64_pd (__mmask8 __U, __m128i __A)
1124 return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
1125 (__v2df)
1126 _mm_setzero_pd (),
1127 (__mmask8) __U);
1130 extern __inline __m256d
1131 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1132 _mm256_mask_xor_pd (__m256d __W, __mmask8 __U, __m256d __A,
1133 __m256d __B)
1135 return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
1136 (__v4df) __B,
1137 (__v4df) __W,
1138 (__mmask8) __U);
1141 extern __inline __m256d
1142 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1143 _mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B)
1145 return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
1146 (__v4df) __B,
1147 (__v4df)
1148 _mm256_setzero_pd (),
1149 (__mmask8) __U);
1152 extern __inline __m128d
1153 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1154 _mm_mask_xor_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1156 return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
1157 (__v2df) __B,
1158 (__v2df) __W,
1159 (__mmask8) __U);
1162 extern __inline __m128d
1163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1164 _mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B)
1166 return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
1167 (__v2df) __B,
1168 (__v2df)
1169 _mm_setzero_pd (),
1170 (__mmask8) __U);
1173 extern __inline __m256
1174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1175 _mm256_mask_xor_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
1177 return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
1178 (__v8sf) __B,
1179 (__v8sf) __W,
1180 (__mmask8) __U);
1183 extern __inline __m256
1184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1185 _mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B)
1187 return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
1188 (__v8sf) __B,
1189 (__v8sf)
1190 _mm256_setzero_ps (),
1191 (__mmask8) __U);
1194 extern __inline __m128
1195 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1196 _mm_mask_xor_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1198 return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
1199 (__v4sf) __B,
1200 (__v4sf) __W,
1201 (__mmask8) __U);
1204 extern __inline __m128
1205 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1206 _mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B)
1208 return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
1209 (__v4sf) __B,
1210 (__v4sf)
1211 _mm_setzero_ps (),
1212 (__mmask8) __U);
1215 extern __inline __m256d
1216 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1217 _mm256_mask_or_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
1219 return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
1220 (__v4df) __B,
1221 (__v4df) __W,
1222 (__mmask8) __U);
1225 extern __inline __m256d
1226 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1227 _mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B)
1229 return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
1230 (__v4df) __B,
1231 (__v4df)
1232 _mm256_setzero_pd (),
1233 (__mmask8) __U);
1236 extern __inline __m128d
1237 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1238 _mm_mask_or_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1240 return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
1241 (__v2df) __B,
1242 (__v2df) __W,
1243 (__mmask8) __U);
1246 extern __inline __m128d
1247 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1248 _mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B)
1250 return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
1251 (__v2df) __B,
1252 (__v2df)
1253 _mm_setzero_pd (),
1254 (__mmask8) __U);
1257 extern __inline __m256
1258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1259 _mm256_mask_or_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
1261 return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
1262 (__v8sf) __B,
1263 (__v8sf) __W,
1264 (__mmask8) __U);
1267 extern __inline __m256
1268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1269 _mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B)
1271 return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
1272 (__v8sf) __B,
1273 (__v8sf)
1274 _mm256_setzero_ps (),
1275 (__mmask8) __U);
1278 extern __inline __m128
1279 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1280 _mm_mask_or_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1282 return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
1283 (__v4sf) __B,
1284 (__v4sf) __W,
1285 (__mmask8) __U);
1288 extern __inline __m128
1289 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1290 _mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B)
1292 return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
1293 (__v4sf) __B,
1294 (__v4sf)
1295 _mm_setzero_ps (),
1296 (__mmask8) __U);
1299 extern __inline __m128i
1300 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1301 _mm_movm_epi32 (__mmask8 __A)
1303 return (__m128i) __builtin_ia32_cvtmask2d128 (__A);
1306 extern __inline __m256i
1307 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1308 _mm256_movm_epi32 (__mmask8 __A)
1310 return (__m256i) __builtin_ia32_cvtmask2d256 (__A);
1313 extern __inline __m128i
1314 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1315 _mm_movm_epi64 (__mmask8 __A)
1317 return (__m128i) __builtin_ia32_cvtmask2q128 (__A);
1320 extern __inline __m256i
1321 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1322 _mm256_movm_epi64 (__mmask8 __A)
1324 return (__m256i) __builtin_ia32_cvtmask2q256 (__A);
1327 extern __inline __mmask8
1328 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1329 _mm_movepi32_mask (__m128i __A)
1331 return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A);
1334 extern __inline __mmask8
1335 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1336 _mm256_movepi32_mask (__m256i __A)
1338 return (__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A);
1341 extern __inline __mmask8
1342 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1343 _mm_movepi64_mask (__m128i __A)
1345 return (__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A);
1348 extern __inline __mmask8
1349 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1350 _mm256_movepi64_mask (__m256i __A)
1352 return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A);
1355 #ifdef __OPTIMIZE__
1356 extern __inline __m128d
1357 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1358 _mm256_extractf64x2_pd (__m256d __A, const int __imm)
1360 return (__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df) __A,
1361 __imm,
1362 (__v2df)
1363 _mm_setzero_pd (),
1364 (__mmask8) -
1368 extern __inline __m128d
1369 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1370 _mm256_mask_extractf64x2_pd (__m128d __W, __mmask8 __U, __m256d __A,
1371 const int __imm)
1373 return (__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df) __A,
1374 __imm,
1375 (__v2df) __W,
1376 (__mmask8)
1377 __U);
1380 extern __inline __m128d
1381 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1382 _mm256_maskz_extractf64x2_pd (__mmask8 __U, __m256d __A,
1383 const int __imm)
1385 return (__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df) __A,
1386 __imm,
1387 (__v2df)
1388 _mm_setzero_pd (),
1389 (__mmask8)
1390 __U);
1393 extern __inline __m128i
1394 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1395 _mm256_extracti64x2_epi64 (__m256i __A, const int __imm)
1397 return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A,
1398 __imm,
1399 (__v2di)
1400 _mm_setzero_di (),
1401 (__mmask8) -
1405 extern __inline __m128i
1406 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1407 _mm256_mask_extracti64x2_epi64 (__m128i __W, __mmask8 __U, __m256i __A,
1408 const int __imm)
1410 return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A,
1411 __imm,
1412 (__v2di) __W,
1413 (__mmask8)
1414 __U);
1417 extern __inline __m128i
1418 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1419 _mm256_maskz_extracti64x2_epi64 (__mmask8 __U, __m256i __A,
1420 const int __imm)
1422 return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A,
1423 __imm,
1424 (__v2di)
1425 _mm_setzero_di (),
1426 (__mmask8)
1427 __U);
1430 extern __inline __m256d
1431 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1432 _mm256_reduce_pd (__m256d __A, int __B)
1434 return (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B,
1435 (__v4df)
1436 _mm256_setzero_pd (),
1437 (__mmask8) -1);
1440 extern __inline __m256d
1441 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1442 _mm256_mask_reduce_pd (__m256d __W, __mmask8 __U, __m256d __A, int __B)
1444 return (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B,
1445 (__v4df) __W,
1446 (__mmask8) __U);
1449 extern __inline __m256d
1450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1451 _mm256_maskz_reduce_pd (__mmask8 __U, __m256d __A, int __B)
1453 return (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B,
1454 (__v4df)
1455 _mm256_setzero_pd (),
1456 (__mmask8) __U);
1459 extern __inline __m128d
1460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1461 _mm_reduce_pd (__m128d __A, int __B)
1463 return (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B,
1464 (__v2df)
1465 _mm_setzero_pd (),
1466 (__mmask8) -1);
1469 extern __inline __m128d
1470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1471 _mm_mask_reduce_pd (__m128d __W, __mmask8 __U, __m128d __A, int __B)
1473 return (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B,
1474 (__v2df) __W,
1475 (__mmask8) __U);
1478 extern __inline __m128d
1479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1480 _mm_maskz_reduce_pd (__mmask8 __U, __m128d __A, int __B)
1482 return (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B,
1483 (__v2df)
1484 _mm_setzero_pd (),
1485 (__mmask8) __U);
1488 extern __inline __m256
1489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1490 _mm256_reduce_ps (__m256 __A, int __B)
1492 return (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B,
1493 (__v8sf)
1494 _mm256_setzero_ps (),
1495 (__mmask8) -1);
1498 extern __inline __m256
1499 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1500 _mm256_mask_reduce_ps (__m256 __W, __mmask8 __U, __m256 __A, int __B)
1502 return (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B,
1503 (__v8sf) __W,
1504 (__mmask8) __U);
1507 extern __inline __m256
1508 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1509 _mm256_maskz_reduce_ps (__mmask8 __U, __m256 __A, int __B)
1511 return (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B,
1512 (__v8sf)
1513 _mm256_setzero_ps (),
1514 (__mmask8) __U);
1517 extern __inline __m128
1518 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1519 _mm_reduce_ps (__m128 __A, int __B)
1521 return (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B,
1522 (__v4sf)
1523 _mm_setzero_ps (),
1524 (__mmask8) -1);
1527 extern __inline __m128
1528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1529 _mm_mask_reduce_ps (__m128 __W, __mmask8 __U, __m128 __A, int __B)
1531 return (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B,
1532 (__v4sf) __W,
1533 (__mmask8) __U);
1536 extern __inline __m128
1537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1538 _mm_maskz_reduce_ps (__mmask8 __U, __m128 __A, int __B)
1540 return (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B,
1541 (__v4sf)
1542 _mm_setzero_ps (),
1543 (__mmask8) __U);
1546 extern __inline __m256d
1547 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1548 _mm256_range_pd (__m256d __A, __m256d __B, int __C)
1550 return (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A,
1551 (__v4df) __B, __C,
1552 (__v4df)
1553 _mm256_setzero_pd (),
1554 (__mmask8) -1);
1557 extern __inline __m256d
1558 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1559 _mm256_mask_range_pd (__m256d __W, __mmask8 __U,
1560 __m256d __A, __m256d __B, int __C)
1562 return (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A,
1563 (__v4df) __B, __C,
1564 (__v4df) __W,
1565 (__mmask8) __U);
1568 extern __inline __m256d
1569 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1570 _mm256_maskz_range_pd (__mmask8 __U, __m256d __A, __m256d __B, int __C)
1572 return (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A,
1573 (__v4df) __B, __C,
1574 (__v4df)
1575 _mm256_setzero_pd (),
1576 (__mmask8) __U);
1579 extern __inline __m128d
1580 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1581 _mm_range_pd (__m128d __A, __m128d __B, int __C)
1583 return (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A,
1584 (__v2df) __B, __C,
1585 (__v2df)
1586 _mm_setzero_pd (),
1587 (__mmask8) -1);
1590 extern __inline __m128d
1591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1592 _mm_mask_range_pd (__m128d __W, __mmask8 __U,
1593 __m128d __A, __m128d __B, int __C)
1595 return (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A,
1596 (__v2df) __B, __C,
1597 (__v2df) __W,
1598 (__mmask8) __U);
1601 extern __inline __m128d
1602 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1603 _mm_maskz_range_pd (__mmask8 __U, __m128d __A, __m128d __B, int __C)
1605 return (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A,
1606 (__v2df) __B, __C,
1607 (__v2df)
1608 _mm_setzero_pd (),
1609 (__mmask8) __U);
1612 extern __inline __m256
1613 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1614 _mm256_range_ps (__m256 __A, __m256 __B, int __C)
1616 return (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A,
1617 (__v8sf) __B, __C,
1618 (__v8sf)
1619 _mm256_setzero_ps (),
1620 (__mmask8) -1);
1623 extern __inline __m256
1624 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1625 _mm256_mask_range_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B,
1626 int __C)
1628 return (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A,
1629 (__v8sf) __B, __C,
1630 (__v8sf) __W,
1631 (__mmask8) __U);
1634 extern __inline __m256
1635 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1636 _mm256_maskz_range_ps (__mmask8 __U, __m256 __A, __m256 __B, int __C)
1638 return (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A,
1639 (__v8sf) __B, __C,
1640 (__v8sf)
1641 _mm256_setzero_ps (),
1642 (__mmask8) __U);
1645 extern __inline __m128
1646 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1647 _mm_range_ps (__m128 __A, __m128 __B, int __C)
1649 return (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A,
1650 (__v4sf) __B, __C,
1651 (__v4sf)
1652 _mm_setzero_ps (),
1653 (__mmask8) -1);
1656 extern __inline __m128
1657 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1658 _mm_mask_range_ps (__m128 __W, __mmask8 __U,
1659 __m128 __A, __m128 __B, int __C)
1661 return (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A,
1662 (__v4sf) __B, __C,
1663 (__v4sf) __W,
1664 (__mmask8) __U);
1667 extern __inline __m128
1668 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1669 _mm_maskz_range_ps (__mmask8 __U, __m128 __A, __m128 __B, int __C)
1671 return (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A,
1672 (__v4sf) __B, __C,
1673 (__v4sf)
1674 _mm_setzero_ps (),
1675 (__mmask8) __U);
1678 extern __inline __mmask8
1679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1680 _mm256_mask_fpclass_pd_mask (__mmask8 __U, __m256d __A,
1681 const int __imm)
1683 return (__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) __A,
1684 __imm, __U);
1687 extern __inline __mmask8
1688 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1689 _mm256_fpclass_pd_mask (__m256d __A, const int __imm)
1691 return (__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) __A,
1692 __imm,
1693 (__mmask8) -1);
1696 extern __inline __mmask8
1697 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1698 _mm256_mask_fpclass_ps_mask (__mmask8 __U, __m256 __A, const int __imm)
1700 return (__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) __A,
1701 __imm, __U);
1704 extern __inline __mmask8
1705 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1706 _mm256_fpclass_ps_mask (__m256 __A, const int __imm)
1708 return (__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) __A,
1709 __imm,
1710 (__mmask8) -1);
1713 extern __inline __mmask8
1714 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1715 _mm_mask_fpclass_pd_mask (__mmask8 __U, __m128d __A, const int __imm)
1717 return (__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) __A,
1718 __imm, __U);
1721 extern __inline __mmask8
1722 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1723 _mm_fpclass_pd_mask (__m128d __A, const int __imm)
1725 return (__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) __A,
1726 __imm,
1727 (__mmask8) -1);
1730 extern __inline __mmask8
1731 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1732 _mm_mask_fpclass_ps_mask (__mmask8 __U, __m128 __A, const int __imm)
1734 return (__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) __A,
1735 __imm, __U);
1738 extern __inline __mmask8
1739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1740 _mm_fpclass_ps_mask (__m128 __A, const int __imm)
1742 return (__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) __A,
1743 __imm,
1744 (__mmask8) -1);
1747 extern __inline __m256i
1748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1749 _mm256_inserti64x2 (__m256i __A, __m128i __B, const int __imm)
1751 return (__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di) __A,
1752 (__v2di) __B,
1753 __imm,
1754 (__v4di)
1755 _mm256_setzero_si256 (),
1756 (__mmask8) -
1760 extern __inline __m256i
1761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1762 _mm256_mask_inserti64x2 (__m256i __W, __mmask8 __U, __m256i __A,
1763 __m128i __B, const int __imm)
1765 return (__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di) __A,
1766 (__v2di) __B,
1767 __imm,
1768 (__v4di) __W,
1769 (__mmask8)
1770 __U);
1773 extern __inline __m256i
1774 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1775 _mm256_maskz_inserti64x2 (__mmask8 __U, __m256i __A, __m128i __B,
1776 const int __imm)
1778 return (__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di) __A,
1779 (__v2di) __B,
1780 __imm,
1781 (__v4di)
1782 _mm256_setzero_si256 (),
1783 (__mmask8)
1784 __U);
1787 extern __inline __m256d
1788 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1789 _mm256_insertf64x2 (__m256d __A, __m128d __B, const int __imm)
1791 return (__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df) __A,
1792 (__v2df) __B,
1793 __imm,
1794 (__v4df)
1795 _mm256_setzero_pd (),
1796 (__mmask8) -
1800 extern __inline __m256d
1801 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1802 _mm256_mask_insertf64x2 (__m256d __W, __mmask8 __U, __m256d __A,
1803 __m128d __B, const int __imm)
1805 return (__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df) __A,
1806 (__v2df) __B,
1807 __imm,
1808 (__v4df) __W,
1809 (__mmask8)
1810 __U);
1813 extern __inline __m256d
1814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1815 _mm256_maskz_insertf64x2 (__mmask8 __U, __m256d __A, __m128d __B,
1816 const int __imm)
1818 return (__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df) __A,
1819 (__v2df) __B,
1820 __imm,
1821 (__v4df)
1822 _mm256_setzero_pd (),
1823 (__mmask8)
1824 __U);
1827 #else
1828 #define _mm256_insertf64x2(X, Y, C) \
1829 ((__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df)(__m256d) (X),\
1830 (__v2df)(__m128d) (Y), (int) (C), \
1831 (__v4df)(__m256d)_mm256_setzero_pd(), \
1832 (__mmask8)-1))
1834 #define _mm256_mask_insertf64x2(W, U, X, Y, C) \
1835 ((__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df)(__m256d) (X),\
1836 (__v2df)(__m128d) (Y), (int) (C), \
1837 (__v4df)(__m256d)(W), \
1838 (__mmask8)(U)))
1840 #define _mm256_maskz_insertf64x2(U, X, Y, C) \
1841 ((__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df)(__m256d) (X),\
1842 (__v2df)(__m128d) (Y), (int) (C), \
1843 (__v4df)(__m256d)_mm256_setzero_pd(), \
1844 (__mmask8)(U)))
1846 #define _mm256_inserti64x2(X, Y, C) \
1847 ((__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di)(__m256i) (X),\
1848 (__v2di)(__m128i) (Y), (int) (C), \
1849 (__v4di)(__m256i)_mm256_setzero_si256 (), \
1850 (__mmask8)-1))
1852 #define _mm256_mask_inserti64x2(W, U, X, Y, C) \
1853 ((__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di)(__m256i) (X),\
1854 (__v2di)(__m128i) (Y), (int) (C), \
1855 (__v4di)(__m256i)(W), \
1856 (__mmask8)(U)))
1858 #define _mm256_maskz_inserti64x2(U, X, Y, C) \
1859 ((__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di)(__m256i) (X),\
1860 (__v2di)(__m128i) (Y), (int) (C), \
1861 (__v4di)(__m256i)_mm256_setzero_si256 (), \
1862 (__mmask8)(U)))
1864 #define _mm256_extractf64x2_pd(X, C) \
1865 ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\
1866 (int) (C), (__v2df)(__m128d) _mm_setzero_pd(), (__mmask8)-1))
1868 #define _mm256_mask_extractf64x2_pd(W, U, X, C) \
1869 ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\
1870 (int) (C), (__v2df)(__m128d) (W), (__mmask8) (U)))
1872 #define _mm256_maskz_extractf64x2_pd(U, X, C) \
1873 ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\
1874 (int) (C), (__v2df)(__m128d) _mm_setzero_pd(), (__mmask8) (U)))
1876 #define _mm256_extracti64x2_epi64(X, C) \
1877 ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\
1878 (int) (C), (__v2di)(__m128i) _mm_setzero_di(), (__mmask8)-1))
1880 #define _mm256_mask_extracti64x2_epi64(W, U, X, C) \
1881 ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\
1882 (int) (C), (__v2di)(__m128i) (W), (__mmask8) (U)))
1884 #define _mm256_maskz_extracti64x2_epi64(U, X, C) \
1885 ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\
1886 (int) (C), (__v2di)(__m128i) _mm_setzero_di(), (__mmask8) (U)))
1888 #define _mm256_reduce_pd(A, B) \
1889 ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A), \
1890 (int)(B), (__v4df)_mm256_setzero_pd(), (__mmask8)-1))
1892 #define _mm256_mask_reduce_pd(W, U, A, B) \
1893 ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A), \
1894 (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
1896 #define _mm256_maskz_reduce_pd(U, A, B) \
1897 ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A), \
1898 (int)(B), (__v4df)_mm256_setzero_pd(), (__mmask8)(U)))
1900 #define _mm_reduce_pd(A, B) \
1901 ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A), \
1902 (int)(B), (__v2df)_mm_setzero_pd(), (__mmask8)-1))
1904 #define _mm_mask_reduce_pd(W, U, A, B) \
1905 ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A), \
1906 (int)(B), (__v2df)(__m128d)(W), (__mmask8)(U)))
1908 #define _mm_maskz_reduce_pd(U, A, B) \
1909 ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A), \
1910 (int)(B), (__v2df)_mm_setzero_pd(), (__mmask8)(U)))
1912 #define _mm256_reduce_ps(A, B) \
1913 ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A), \
1914 (int)(B), (__v8sf)_mm256_setzero_ps(), (__mmask8)-1))
1916 #define _mm256_mask_reduce_ps(W, U, A, B) \
1917 ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A), \
1918 (int)(B), (__v8sf)(__m256)(W), (__mmask8)(U)))
1920 #define _mm256_maskz_reduce_ps(U, A, B) \
1921 ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A), \
1922 (int)(B), (__v8sf)_mm256_setzero_ps(), (__mmask8)(U)))
1924 #define _mm_reduce_ps(A, B) \
1925 ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A), \
1926 (int)(B), (__v4sf)_mm_setzero_ps(), (__mmask8)-1))
1928 #define _mm_mask_reduce_ps(W, U, A, B) \
1929 ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A), \
1930 (int)(B), (__v4sf)(__m128)(W), (__mmask8)(U)))
1932 #define _mm_maskz_reduce_ps(U, A, B) \
1933 ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A), \
1934 (int)(B), (__v4sf)_mm_setzero_ps(), (__mmask8)(U)))
1936 #define _mm256_range_pd(A, B, C) \
1937 ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A), \
1938 (__v4df)(__m256d)(B), (int)(C), \
1939 (__v4df)_mm256_setzero_pd(), (__mmask8)-1))
1941 #define _mm256_maskz_range_pd(U, A, B, C) \
1942 ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A), \
1943 (__v4df)(__m256d)(B), (int)(C), \
1944 (__v4df)_mm256_setzero_pd(), (__mmask8)(U)))
1946 #define _mm_range_pd(A, B, C) \
1947 ((__m128d) __builtin_ia32_rangepd128_mask ((__v2df)(__m128d)(A), \
1948 (__v2df)(__m128d)(B), (int)(C), \
1949 (__v2df)_mm_setzero_pd(), (__mmask8)-1))
1951 #define _mm256_range_ps(A, B, C) \
1952 ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A), \
1953 (__v8sf)(__m256)(B), (int)(C), \
1954 (__v8sf)_mm256_setzero_ps(), (__mmask8)-1))
1956 #define _mm256_mask_range_ps(W, U, A, B, C) \
1957 ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A), \
1958 (__v8sf)(__m256)(B), (int)(C), \
1959 (__v8sf)(__m256)(W), (__mmask8)(U)))
1961 #define _mm256_maskz_range_ps(U, A, B, C) \
1962 ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A), \
1963 (__v8sf)(__m256)(B), (int)(C), \
1964 (__v8sf)_mm256_setzero_ps(), (__mmask8)(U)))
1966 #define _mm_range_ps(A, B, C) \
1967 ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A), \
1968 (__v4sf)(__m128)(B), (int)(C), \
1969 (__v4sf)_mm_setzero_ps(), (__mmask8)-1))
1971 #define _mm_mask_range_ps(W, U, A, B, C) \
1972 ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A), \
1973 (__v4sf)(__m128)(B), (int)(C), \
1974 (__v4sf)(__m128)(W), (__mmask8)(U)))
1976 #define _mm_maskz_range_ps(U, A, B, C) \
1977 ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A), \
1978 (__v4sf)(__m128)(B), (int)(C), \
1979 (__v4sf)_mm_setzero_ps(), (__mmask8)(U)))
1981 #define _mm256_mask_range_pd(W, U, A, B, C) \
1982 ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A), \
1983 (__v4df)(__m256d)(B), (int)(C), \
1984 (__v4df)(__m256d)(W), (__mmask8)(U)))
1986 #define _mm_mask_range_pd(W, U, A, B, C) \
1987 ((__m128d) __builtin_ia32_rangepd128_mask ((__v2df)(__m128d)(A), \
1988 (__v2df)(__m128d)(B), (int)(C), \
1989 (__v2df)(__m128d)(W), (__mmask8)(U)))
1991 #define _mm_maskz_range_pd(U, A, B, C) \
1992 ((__m128d) __builtin_ia32_rangepd128_mask ((__v2df)(__m128d)(A), \
1993 (__v2df)(__m128d)(B), (int)(C), \
1994 (__v2df)_mm_setzero_pd(), (__mmask8)(U)))
1996 #define _mm256_mask_fpclass_pd_mask(u, X, C) \
1997 ((__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) (__m256d) (X), \
1998 (int) (C),(__mmask8)(u)))
2000 #define _mm256_mask_fpclass_ps_mask(u, X, C) \
2001 ((__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) (__m256) (X), \
2002 (int) (C),(__mmask8)(u)))
2004 #define _mm_mask_fpclass_pd_mask(u, X, C) \
2005 ((__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) (__m128d) (X), \
2006 (int) (C),(__mmask8)(u)))
2008 #define _mm_mask_fpclass_ps_mask(u, X, C) \
2009 ((__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) (__m128) (X), \
2010 (int) (C),(__mmask8)(u)))
2012 #define _mm256_fpclass_pd_mask(X, C) \
2013 ((__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) (__m256d) (X), \
2014 (int) (C),(__mmask8)-1))
2016 #define _mm256_fpclass_ps_mask(X, C) \
2017 ((__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) (__m256) (X), \
2018 (int) (C),(__mmask8)-1))
2020 #define _mm_fpclass_pd_mask(X, C) \
2021 ((__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) (__m128d) (X), \
2022 (int) (C),(__mmask8)-1))
2024 #define _mm_fpclass_ps_mask(X, C) \
2025 ((__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) (__m128) (X), \
2026 (int) (C),(__mmask8)-1))
2028 #endif
2030 #ifdef __DISABLE_AVX512VLDQ__
2031 #undef __DISABLE_AVX512VLDQ__
2032 #pragma GCC pop_options
2033 #endif /* __DISABLE_AVX512VLDQ__ */
2035 #endif /* _AVX512VLDQINTRIN_H_INCLUDED */