Roll NDK to pick std::deque patch.
[android_tools.git] / sdk / build-tools / 25.0.2 / renderscript / clang-include / avx512fintrin.h
blob8dcdc710d5c33d45a46e11c3e214306265b82e4f
1 /*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------===
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
21 *===-----------------------------------------------------------------------===
23 #ifndef __IMMINTRIN_H
24 #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
25 #endif
27 #ifndef __AVX512FINTRIN_H
28 #define __AVX512FINTRIN_H
30 typedef double __v8df __attribute__((__vector_size__(64)));
31 typedef float __v16sf __attribute__((__vector_size__(64)));
32 typedef long long __v8di __attribute__((__vector_size__(64)));
33 typedef int __v16si __attribute__((__vector_size__(64)));
35 typedef float __m512 __attribute__((__vector_size__(64)));
36 typedef double __m512d __attribute__((__vector_size__(64)));
37 typedef long long __m512i __attribute__((__vector_size__(64)));
39 typedef unsigned char __mmask8;
40 typedef unsigned short __mmask16;
42 /* Rounding mode macros. */
43 #define _MM_FROUND_TO_NEAREST_INT 0x00
44 #define _MM_FROUND_TO_NEG_INF 0x01
45 #define _MM_FROUND_TO_POS_INF 0x02
46 #define _MM_FROUND_TO_ZERO 0x03
47 #define _MM_FROUND_CUR_DIRECTION 0x04
49 /* Define the default attributes for the functions in this file. */
50 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
52 /* Create vectors with repeated elements */
54 static __inline __m512i __DEFAULT_FN_ATTRS
55 _mm512_setzero_si512(void)
57 return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
60 static __inline__ __m512d __DEFAULT_FN_ATTRS
61 _mm512_undefined_pd()
63 return (__m512d)__builtin_ia32_undef512();
66 static __inline__ __m512 __DEFAULT_FN_ATTRS
67 _mm512_undefined()
69 return (__m512)__builtin_ia32_undef512();
72 static __inline__ __m512 __DEFAULT_FN_ATTRS
73 _mm512_undefined_ps()
75 return (__m512)__builtin_ia32_undef512();
78 static __inline__ __m512i __DEFAULT_FN_ATTRS
79 _mm512_undefined_epi32()
81 return (__m512i)__builtin_ia32_undef512();
84 static __inline __m512i __DEFAULT_FN_ATTRS
85 _mm512_maskz_set1_epi32(__mmask16 __M, int __A)
87 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
88 (__v16si)
89 _mm512_setzero_si512 (),
90 __M);
93 static __inline __m512i __DEFAULT_FN_ATTRS
94 _mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
96 #ifdef __x86_64__
97 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
98 (__v8di)
99 _mm512_setzero_si512 (),
100 __M);
101 #else
102 return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A,
103 (__v8di)
104 _mm512_setzero_si512 (),
105 __M);
106 #endif
109 static __inline __m512 __DEFAULT_FN_ATTRS
110 _mm512_setzero_ps(void)
112 return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
113 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
115 static __inline __m512d __DEFAULT_FN_ATTRS
116 _mm512_setzero_pd(void)
118 return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
121 static __inline __m512 __DEFAULT_FN_ATTRS
122 _mm512_set1_ps(float __w)
124 return (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
125 __w, __w, __w, __w, __w, __w, __w, __w };
128 static __inline __m512d __DEFAULT_FN_ATTRS
129 _mm512_set1_pd(double __w)
131 return (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
134 static __inline __m512i __DEFAULT_FN_ATTRS
135 _mm512_set1_epi32(int __s)
137 return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s,
138 __s, __s, __s, __s, __s, __s, __s, __s };
141 static __inline __m512i __DEFAULT_FN_ATTRS
142 _mm512_set1_epi64(long long __d)
144 return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
147 static __inline__ __m512 __DEFAULT_FN_ATTRS
148 _mm512_broadcastss_ps(__m128 __X)
150 float __f = __X[0];
151 return (__v16sf){ __f, __f, __f, __f,
152 __f, __f, __f, __f,
153 __f, __f, __f, __f,
154 __f, __f, __f, __f };
157 static __inline__ __m512d __DEFAULT_FN_ATTRS
158 _mm512_broadcastsd_pd(__m128d __X)
160 double __d = __X[0];
161 return (__v8df){ __d, __d, __d, __d,
162 __d, __d, __d, __d };
165 /* Cast between vector types */
167 static __inline __m512d __DEFAULT_FN_ATTRS
168 _mm512_castpd256_pd512(__m256d __a)
170 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
173 static __inline __m512 __DEFAULT_FN_ATTRS
174 _mm512_castps256_ps512(__m256 __a)
176 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7,
177 -1, -1, -1, -1, -1, -1, -1, -1);
180 static __inline __m128d __DEFAULT_FN_ATTRS
181 _mm512_castpd512_pd128(__m512d __a)
183 return __builtin_shufflevector(__a, __a, 0, 1);
186 static __inline __m128 __DEFAULT_FN_ATTRS
187 _mm512_castps512_ps128(__m512 __a)
189 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
192 /* Bitwise operators */
193 static __inline__ __m512i __DEFAULT_FN_ATTRS
194 _mm512_and_epi32(__m512i __a, __m512i __b)
196 return __a & __b;
199 static __inline__ __m512i __DEFAULT_FN_ATTRS
200 _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
202 return (__m512i) __builtin_ia32_pandd512_mask((__v16si) __a,
203 (__v16si) __b,
204 (__v16si) __src,
205 (__mmask16) __k);
207 static __inline__ __m512i __DEFAULT_FN_ATTRS
208 _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
210 return (__m512i) __builtin_ia32_pandd512_mask((__v16si) __a,
211 (__v16si) __b,
212 (__v16si)
213 _mm512_setzero_si512 (),
214 (__mmask16) __k);
217 static __inline__ __m512i __DEFAULT_FN_ATTRS
218 _mm512_and_epi64(__m512i __a, __m512i __b)
220 return __a & __b;
223 static __inline__ __m512i __DEFAULT_FN_ATTRS
224 _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
226 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __a,
227 (__v8di) __b,
228 (__v8di) __src,
229 (__mmask8) __k);
231 static __inline__ __m512i __DEFAULT_FN_ATTRS
232 _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
234 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __a,
235 (__v8di) __b,
236 (__v8di)
237 _mm512_setzero_si512 (),
238 (__mmask8) __k);
241 static __inline__ __m512i __DEFAULT_FN_ATTRS
242 _mm512_andnot_epi32 (__m512i __A, __m512i __B)
244 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
245 (__v16si) __B,
246 (__v16si)
247 _mm512_setzero_si512 (),
248 (__mmask16) -1);
251 static __inline__ __m512i __DEFAULT_FN_ATTRS
252 _mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
254 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
255 (__v16si) __B,
256 (__v16si) __W,
257 (__mmask16) __U);
260 static __inline__ __m512i __DEFAULT_FN_ATTRS
261 _mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
263 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
264 (__v16si) __B,
265 (__v16si)
266 _mm512_setzero_si512 (),
267 (__mmask16) __U);
270 static __inline__ __m512i __DEFAULT_FN_ATTRS
271 _mm512_andnot_epi64 (__m512i __A, __m512i __B)
273 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
274 (__v8di) __B,
275 (__v8di)
276 _mm512_setzero_si512 (),
277 (__mmask8) -1);
280 static __inline__ __m512i __DEFAULT_FN_ATTRS
281 _mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
283 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
284 (__v8di) __B,
285 (__v8di) __W, __U);
288 static __inline__ __m512i __DEFAULT_FN_ATTRS
289 _mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
291 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
292 (__v8di) __B,
293 (__v8di)
294 _mm512_setzero_pd (),
295 __U);
297 static __inline__ __m512i __DEFAULT_FN_ATTRS
298 _mm512_or_epi32(__m512i __a, __m512i __b)
300 return __a | __b;
303 static __inline__ __m512i __DEFAULT_FN_ATTRS
304 _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
306 return (__m512i) __builtin_ia32_pord512_mask((__v16si) __a,
307 (__v16si) __b,
308 (__v16si) __src,
309 (__mmask16) __k);
311 static __inline__ __m512i __DEFAULT_FN_ATTRS
312 _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
314 return (__m512i) __builtin_ia32_pord512_mask((__v16si) __a,
315 (__v16si) __b,
316 (__v16si)
317 _mm512_setzero_si512 (),
318 (__mmask16) __k);
321 static __inline__ __m512i __DEFAULT_FN_ATTRS
322 _mm512_or_epi64(__m512i __a, __m512i __b)
324 return __a | __b;
327 static __inline__ __m512i __DEFAULT_FN_ATTRS
328 _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
330 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __a,
331 (__v8di) __b,
332 (__v8di) __src,
333 (__mmask8) __k);
335 static __inline__ __m512i __DEFAULT_FN_ATTRS
336 _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
338 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __a,
339 (__v8di) __b,
340 (__v8di)
341 _mm512_setzero_si512 (),
342 (__mmask8) __k);
345 static __inline__ __m512i __DEFAULT_FN_ATTRS
346 _mm512_xor_epi32(__m512i __a, __m512i __b)
348 return __a ^ __b;
351 static __inline__ __m512i __DEFAULT_FN_ATTRS
352 _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
354 return (__m512i) __builtin_ia32_pxord512_mask((__v16si) __a,
355 (__v16si) __b,
356 (__v16si) __src,
357 (__mmask16) __k);
359 static __inline__ __m512i __DEFAULT_FN_ATTRS
360 _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
362 return (__m512i) __builtin_ia32_pxord512_mask((__v16si) __a,
363 (__v16si) __b,
364 (__v16si)
365 _mm512_setzero_si512 (),
366 (__mmask16) __k);
369 static __inline__ __m512i __DEFAULT_FN_ATTRS
370 _mm512_xor_epi64(__m512i __a, __m512i __b)
372 return __a ^ __b;
375 static __inline__ __m512i __DEFAULT_FN_ATTRS
376 _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
378 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __a,
379 (__v8di) __b,
380 (__v8di) __src,
381 (__mmask8) __k);
383 static __inline__ __m512i __DEFAULT_FN_ATTRS
384 _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
386 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __a,
387 (__v8di) __b,
388 (__v8di)
389 _mm512_setzero_si512 (),
390 (__mmask8) __k);
393 static __inline__ __m512i __DEFAULT_FN_ATTRS
394 _mm512_and_si512(__m512i __a, __m512i __b)
396 return __a & __b;
399 static __inline__ __m512i __DEFAULT_FN_ATTRS
400 _mm512_or_si512(__m512i __a, __m512i __b)
402 return __a | __b;
405 static __inline__ __m512i __DEFAULT_FN_ATTRS
406 _mm512_xor_si512(__m512i __a, __m512i __b)
408 return __a ^ __b;
410 /* Arithmetic */
412 static __inline __m512d __DEFAULT_FN_ATTRS
413 _mm512_add_pd(__m512d __a, __m512d __b)
415 return __a + __b;
418 static __inline __m512 __DEFAULT_FN_ATTRS
419 _mm512_add_ps(__m512 __a, __m512 __b)
421 return __a + __b;
424 static __inline __m512d __DEFAULT_FN_ATTRS
425 _mm512_mul_pd(__m512d __a, __m512d __b)
427 return __a * __b;
430 static __inline __m512 __DEFAULT_FN_ATTRS
431 _mm512_mul_ps(__m512 __a, __m512 __b)
433 return __a * __b;
436 static __inline __m512d __DEFAULT_FN_ATTRS
437 _mm512_sub_pd(__m512d __a, __m512d __b)
439 return __a - __b;
442 static __inline __m512 __DEFAULT_FN_ATTRS
443 _mm512_sub_ps(__m512 __a, __m512 __b)
445 return __a - __b;
448 static __inline__ __m512i __DEFAULT_FN_ATTRS
449 _mm512_add_epi64 (__m512i __A, __m512i __B)
451 return (__m512i) ((__v8di) __A + (__v8di) __B);
454 static __inline__ __m512i __DEFAULT_FN_ATTRS
455 _mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
457 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
458 (__v8di) __B,
459 (__v8di) __W,
460 (__mmask8) __U);
463 static __inline__ __m512i __DEFAULT_FN_ATTRS
464 _mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
466 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
467 (__v8di) __B,
468 (__v8di)
469 _mm512_setzero_si512 (),
470 (__mmask8) __U);
473 static __inline__ __m512i __DEFAULT_FN_ATTRS
474 _mm512_sub_epi64 (__m512i __A, __m512i __B)
476 return (__m512i) ((__v8di) __A - (__v8di) __B);
479 static __inline__ __m512i __DEFAULT_FN_ATTRS
480 _mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
482 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
483 (__v8di) __B,
484 (__v8di) __W,
485 (__mmask8) __U);
488 static __inline__ __m512i __DEFAULT_FN_ATTRS
489 _mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
491 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
492 (__v8di) __B,
493 (__v8di)
494 _mm512_setzero_si512 (),
495 (__mmask8) __U);
498 static __inline__ __m512i __DEFAULT_FN_ATTRS
499 _mm512_add_epi32 (__m512i __A, __m512i __B)
501 return (__m512i) ((__v16si) __A + (__v16si) __B);
504 static __inline__ __m512i __DEFAULT_FN_ATTRS
505 _mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
507 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
508 (__v16si) __B,
509 (__v16si) __W,
510 (__mmask16) __U);
513 static __inline__ __m512i __DEFAULT_FN_ATTRS
514 _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
516 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
517 (__v16si) __B,
518 (__v16si)
519 _mm512_setzero_si512 (),
520 (__mmask16) __U);
523 static __inline__ __m512i __DEFAULT_FN_ATTRS
524 _mm512_sub_epi32 (__m512i __A, __m512i __B)
526 return (__m512i) ((__v16si) __A - (__v16si) __B);
529 static __inline__ __m512i __DEFAULT_FN_ATTRS
530 _mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
532 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
533 (__v16si) __B,
534 (__v16si) __W,
535 (__mmask16) __U);
538 static __inline__ __m512i __DEFAULT_FN_ATTRS
539 _mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
541 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
542 (__v16si) __B,
543 (__v16si)
544 _mm512_setzero_si512 (),
545 (__mmask16) __U);
548 static __inline__ __m512d __DEFAULT_FN_ATTRS
549 _mm512_max_pd(__m512d __A, __m512d __B)
551 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
552 (__v8df) __B,
553 (__v8df)
554 _mm512_setzero_pd (),
555 (__mmask8) -1,
556 _MM_FROUND_CUR_DIRECTION);
559 static __inline__ __m512 __DEFAULT_FN_ATTRS
560 _mm512_max_ps(__m512 __A, __m512 __B)
562 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
563 (__v16sf) __B,
564 (__v16sf)
565 _mm512_setzero_ps (),
566 (__mmask16) -1,
567 _MM_FROUND_CUR_DIRECTION);
570 static __inline__ __m128 __DEFAULT_FN_ATTRS
571 _mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
572 return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
573 (__v4sf) __B,
574 (__v4sf) __W,
575 (__mmask8) __U,
576 _MM_FROUND_CUR_DIRECTION);
579 static __inline__ __m128 __DEFAULT_FN_ATTRS
580 _mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
581 return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
582 (__v4sf) __B,
583 (__v4sf) _mm_setzero_ps (),
584 (__mmask8) __U,
585 _MM_FROUND_CUR_DIRECTION);
588 #define _mm_max_round_ss(__A, __B, __R) __extension__ ({ \
589 (__m128) __builtin_ia32_maxss_round ((__v4sf) __A, (__v4sf) __B, \
590 (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
592 #define _mm_mask_max_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
593 (__m128) __builtin_ia32_maxss_round ((__v4sf) __A, (__v4sf) __B, \
594 (__v4sf) __W, (__mmask8) __U,__R); })
596 #define _mm_maskz_max_round_ss(__U, __A, __B, __R) __extension__ ({ \
597 (__m128) __builtin_ia32_maxss_round ((__v4sf) __A, (__v4sf) __B, \
598 (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); })
600 static __inline__ __m128d __DEFAULT_FN_ATTRS
601 _mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
602 return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
603 (__v2df) __B,
604 (__v2df) __W,
605 (__mmask8) __U,
606 _MM_FROUND_CUR_DIRECTION);
609 static __inline__ __m128d __DEFAULT_FN_ATTRS
610 _mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
611 return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
612 (__v2df) __B,
613 (__v2df) _mm_setzero_pd (),
614 (__mmask8) __U,
615 _MM_FROUND_CUR_DIRECTION);
618 #define _mm_max_round_sd(__A, __B, __R) __extension__ ({ \
619 (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A, (__v2df) __B, \
620 (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
622 #define _mm_mask_max_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
623 (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A, (__v2df) __B, \
624 (__v2df) __W, (__mmask8) __U,__R); })
626 #define _mm_maskz_max_round_sd(__U, __A, __B, __R) __extension__ ({ \
627 (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A, (__v2df) __B, \
628 (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); })
630 static __inline __m512i
631 __DEFAULT_FN_ATTRS
632 _mm512_max_epi32(__m512i __A, __m512i __B)
634 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
635 (__v16si) __B,
636 (__v16si)
637 _mm512_setzero_si512 (),
638 (__mmask16) -1);
641 static __inline __m512i __DEFAULT_FN_ATTRS
642 _mm512_max_epu32(__m512i __A, __m512i __B)
644 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
645 (__v16si) __B,
646 (__v16si)
647 _mm512_setzero_si512 (),
648 (__mmask16) -1);
651 static __inline __m512i __DEFAULT_FN_ATTRS
652 _mm512_max_epi64(__m512i __A, __m512i __B)
654 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
655 (__v8di) __B,
656 (__v8di)
657 _mm512_setzero_si512 (),
658 (__mmask8) -1);
661 static __inline __m512i __DEFAULT_FN_ATTRS
662 _mm512_max_epu64(__m512i __A, __m512i __B)
664 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
665 (__v8di) __B,
666 (__v8di)
667 _mm512_setzero_si512 (),
668 (__mmask8) -1);
671 static __inline__ __m512d __DEFAULT_FN_ATTRS
672 _mm512_min_pd(__m512d __A, __m512d __B)
674 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
675 (__v8df) __B,
676 (__v8df)
677 _mm512_setzero_pd (),
678 (__mmask8) -1,
679 _MM_FROUND_CUR_DIRECTION);
682 static __inline__ __m512 __DEFAULT_FN_ATTRS
683 _mm512_min_ps(__m512 __A, __m512 __B)
685 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
686 (__v16sf) __B,
687 (__v16sf)
688 _mm512_setzero_ps (),
689 (__mmask16) -1,
690 _MM_FROUND_CUR_DIRECTION);
693 static __inline__ __m128 __DEFAULT_FN_ATTRS
694 _mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
695 return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
696 (__v4sf) __B,
697 (__v4sf) __W,
698 (__mmask8) __U,
699 _MM_FROUND_CUR_DIRECTION);
702 static __inline__ __m128 __DEFAULT_FN_ATTRS
703 _mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
704 return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
705 (__v4sf) __B,
706 (__v4sf) _mm_setzero_ps (),
707 (__mmask8) __U,
708 _MM_FROUND_CUR_DIRECTION);
711 #define _mm_min_round_ss(__A, __B, __R) __extension__ ({ \
712 (__m128) __builtin_ia32_minss_round ((__v4sf) __A, (__v4sf) __B, \
713 (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
715 #define _mm_mask_min_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
716 (__m128) __builtin_ia32_minss_round ((__v4sf) __A, (__v4sf) __B, \
717 (__v4sf) __W, (__mmask8) __U,__R); })
719 #define _mm_maskz_min_round_ss(__U, __A, __B, __R) __extension__ ({ \
720 (__m128) __builtin_ia32_minss_round ((__v4sf) __A, (__v4sf) __B, \
721 (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); })
723 static __inline__ __m128d __DEFAULT_FN_ATTRS
724 _mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
725 return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
726 (__v2df) __B,
727 (__v2df) __W,
728 (__mmask8) __U,
729 _MM_FROUND_CUR_DIRECTION);
732 static __inline__ __m128d __DEFAULT_FN_ATTRS
733 _mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
734 return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
735 (__v2df) __B,
736 (__v2df) _mm_setzero_pd (),
737 (__mmask8) __U,
738 _MM_FROUND_CUR_DIRECTION);
741 #define _mm_min_round_sd(__A, __B, __R) __extension__ ({ \
742 (__m128d) __builtin_ia32_minsd_round ((__v2df) __A, (__v2df) __B, \
743 (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
745 #define _mm_mask_min_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
746 (__m128d) __builtin_ia32_minsd_round ((__v2df) __A, (__v2df) __B, \
747 (__v2df) __W, (__mmask8) __U,__R); })
749 #define _mm_maskz_min_round_sd(__U, __A, __B, __R) __extension__ ({ \
750 (__m128d) __builtin_ia32_minsd_round ((__v2df) __A, (__v2df) __B, \
751 (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); })
753 static __inline __m512i
754 __DEFAULT_FN_ATTRS
755 _mm512_min_epi32(__m512i __A, __m512i __B)
757 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
758 (__v16si) __B,
759 (__v16si)
760 _mm512_setzero_si512 (),
761 (__mmask16) -1);
764 static __inline __m512i __DEFAULT_FN_ATTRS
765 _mm512_min_epu32(__m512i __A, __m512i __B)
767 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
768 (__v16si) __B,
769 (__v16si)
770 _mm512_setzero_si512 (),
771 (__mmask16) -1);
774 static __inline __m512i __DEFAULT_FN_ATTRS
775 _mm512_min_epi64(__m512i __A, __m512i __B)
777 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
778 (__v8di) __B,
779 (__v8di)
780 _mm512_setzero_si512 (),
781 (__mmask8) -1);
784 static __inline __m512i __DEFAULT_FN_ATTRS
785 _mm512_min_epu64(__m512i __A, __m512i __B)
787 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
788 (__v8di) __B,
789 (__v8di)
790 _mm512_setzero_si512 (),
791 (__mmask8) -1);
794 static __inline __m512i __DEFAULT_FN_ATTRS
795 _mm512_mul_epi32(__m512i __X, __m512i __Y)
797 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
798 (__v16si) __Y,
799 (__v8di)
800 _mm512_setzero_si512 (),
801 (__mmask8) -1);
804 static __inline __m512i __DEFAULT_FN_ATTRS
805 _mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
807 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
808 (__v16si) __Y,
809 (__v8di) __W, __M);
812 static __inline __m512i __DEFAULT_FN_ATTRS
813 _mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
815 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
816 (__v16si) __Y,
817 (__v8di)
818 _mm512_setzero_si512 (),
819 __M);
822 static __inline __m512i __DEFAULT_FN_ATTRS
823 _mm512_mul_epu32(__m512i __X, __m512i __Y)
825 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
826 (__v16si) __Y,
827 (__v8di)
828 _mm512_setzero_si512 (),
829 (__mmask8) -1);
832 static __inline __m512i __DEFAULT_FN_ATTRS
833 _mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
835 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
836 (__v16si) __Y,
837 (__v8di) __W, __M);
840 static __inline __m512i __DEFAULT_FN_ATTRS
841 _mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
843 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
844 (__v16si) __Y,
845 (__v8di)
846 _mm512_setzero_si512 (),
847 __M);
850 static __inline __m512i __DEFAULT_FN_ATTRS
851 _mm512_mullo_epi32 (__m512i __A, __m512i __B)
853 return (__m512i) ((__v16si) __A * (__v16si) __B);
856 static __inline __m512i __DEFAULT_FN_ATTRS
857 _mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
859 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
860 (__v16si) __B,
861 (__v16si)
862 _mm512_setzero_si512 (),
863 __M);
866 static __inline __m512i __DEFAULT_FN_ATTRS
867 _mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
869 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
870 (__v16si) __B,
871 (__v16si) __W, __M);
874 static __inline__ __m512d __DEFAULT_FN_ATTRS
875 _mm512_sqrt_pd(__m512d __a)
877 return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)__a,
878 (__v8df) _mm512_setzero_pd (),
879 (__mmask8) -1,
880 _MM_FROUND_CUR_DIRECTION);
883 static __inline__ __m512 __DEFAULT_FN_ATTRS
884 _mm512_sqrt_ps(__m512 __a)
886 return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__a,
887 (__v16sf) _mm512_setzero_ps (),
888 (__mmask16) -1,
889 _MM_FROUND_CUR_DIRECTION);
892 static __inline__ __m512d __DEFAULT_FN_ATTRS
893 _mm512_rsqrt14_pd(__m512d __A)
895 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
896 (__v8df)
897 _mm512_setzero_pd (),
898 (__mmask8) -1);}
900 static __inline__ __m512 __DEFAULT_FN_ATTRS
901 _mm512_rsqrt14_ps(__m512 __A)
903 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
904 (__v16sf)
905 _mm512_setzero_ps (),
906 (__mmask16) -1);
909 static __inline__ __m128 __DEFAULT_FN_ATTRS
910 _mm_rsqrt14_ss(__m128 __A, __m128 __B)
912 return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __A,
913 (__v4sf) __B,
914 (__v4sf)
915 _mm_setzero_ps (),
916 (__mmask8) -1);
919 static __inline__ __m128d __DEFAULT_FN_ATTRS
920 _mm_rsqrt14_sd(__m128d __A, __m128d __B)
922 return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __A,
923 (__v2df) __B,
924 (__v2df)
925 _mm_setzero_pd (),
926 (__mmask8) -1);
929 static __inline__ __m512d __DEFAULT_FN_ATTRS
930 _mm512_rcp14_pd(__m512d __A)
932 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
933 (__v8df)
934 _mm512_setzero_pd (),
935 (__mmask8) -1);
938 static __inline__ __m512 __DEFAULT_FN_ATTRS
939 _mm512_rcp14_ps(__m512 __A)
941 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
942 (__v16sf)
943 _mm512_setzero_ps (),
944 (__mmask16) -1);
946 static __inline__ __m128 __DEFAULT_FN_ATTRS
947 _mm_rcp14_ss(__m128 __A, __m128 __B)
949 return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __A,
950 (__v4sf) __B,
951 (__v4sf)
952 _mm_setzero_ps (),
953 (__mmask8) -1);
956 static __inline__ __m128d __DEFAULT_FN_ATTRS
957 _mm_rcp14_sd(__m128d __A, __m128d __B)
959 return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __A,
960 (__v2df) __B,
961 (__v2df)
962 _mm_setzero_pd (),
963 (__mmask8) -1);
966 static __inline __m512 __DEFAULT_FN_ATTRS
967 _mm512_floor_ps(__m512 __A)
969 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
970 _MM_FROUND_FLOOR,
971 (__v16sf) __A, -1,
972 _MM_FROUND_CUR_DIRECTION);
975 static __inline __m512d __DEFAULT_FN_ATTRS
976 _mm512_floor_pd(__m512d __A)
978 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
979 _MM_FROUND_FLOOR,
980 (__v8df) __A, -1,
981 _MM_FROUND_CUR_DIRECTION);
984 static __inline __m512 __DEFAULT_FN_ATTRS
985 _mm512_ceil_ps(__m512 __A)
987 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
988 _MM_FROUND_CEIL,
989 (__v16sf) __A, -1,
990 _MM_FROUND_CUR_DIRECTION);
993 static __inline __m512d __DEFAULT_FN_ATTRS
994 _mm512_ceil_pd(__m512d __A)
996 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
997 _MM_FROUND_CEIL,
998 (__v8df) __A, -1,
999 _MM_FROUND_CUR_DIRECTION);
1002 static __inline __m512i __DEFAULT_FN_ATTRS
1003 _mm512_abs_epi64(__m512i __A)
1005 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
1006 (__v8di)
1007 _mm512_setzero_si512 (),
1008 (__mmask8) -1);
1011 static __inline __m512i __DEFAULT_FN_ATTRS
1012 _mm512_abs_epi32(__m512i __A)
1014 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
1015 (__v16si)
1016 _mm512_setzero_si512 (),
1017 (__mmask16) -1);
1020 static __inline__ __m128 __DEFAULT_FN_ATTRS
1021 _mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1022 return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
1023 (__v4sf) __B,
1024 (__v4sf) __W,
1025 (__mmask8) __U,
1026 _MM_FROUND_CUR_DIRECTION);
1029 static __inline__ __m128 __DEFAULT_FN_ATTRS
1030 _mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1031 return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
1032 (__v4sf) __B,
1033 (__v4sf) _mm_setzero_ps (),
1034 (__mmask8) __U,
1035 _MM_FROUND_CUR_DIRECTION);
1038 #define _mm_add_round_ss(__A, __B, __R) __extension__ ({ \
1039 (__m128) __builtin_ia32_addss_round ((__v4sf) __A, (__v4sf) __B, \
1040 (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
1042 #define _mm_mask_add_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
1043 (__m128) __builtin_ia32_addss_round ((__v4sf) __A, (__v4sf) __B, \
1044 (__v4sf) __W, (__mmask8) __U,__R); })
1046 #define _mm_maskz_add_round_ss(__U, __A, __B, __R) __extension__ ({ \
1047 (__m128) __builtin_ia32_addss_round ((__v4sf) __A, (__v4sf) __B, \
1048 (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); })
1050 static __inline__ __m128d __DEFAULT_FN_ATTRS
1051 _mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1052 return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
1053 (__v2df) __B,
1054 (__v2df) __W,
1055 (__mmask8) __U,
1056 _MM_FROUND_CUR_DIRECTION);
1059 static __inline__ __m128d __DEFAULT_FN_ATTRS
1060 _mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1061 return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
1062 (__v2df) __B,
1063 (__v2df) _mm_setzero_pd (),
1064 (__mmask8) __U,
1065 _MM_FROUND_CUR_DIRECTION);
1067 #define _mm_add_round_sd(__A, __B, __R) __extension__ ({ \
1068 (__m128d) __builtin_ia32_addsd_round ((__v2df) __A, (__v2df) __B, \
1069 (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
1071 #define _mm_mask_add_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
1072 (__m128d) __builtin_ia32_addsd_round ((__v2df) __A, (__v2df) __B, \
1073 (__v2df) __W, (__mmask8) __U,__R); })
1075 #define _mm_maskz_add_round_sd(__U, __A, __B, __R) __extension__ ({ \
1076 (__m128d) __builtin_ia32_addsd_round ((__v2df) __A, (__v2df) __B, \
1077 (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); })
1079 static __inline__ __m512d __DEFAULT_FN_ATTRS
1080 _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1081 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
1082 (__v8df) __B,
1083 (__v8df) __W,
1084 (__mmask8) __U,
1085 _MM_FROUND_CUR_DIRECTION);
1088 static __inline__ __m512d __DEFAULT_FN_ATTRS
1089 _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
1090 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
1091 (__v8df) __B,
1092 (__v8df) _mm512_setzero_pd (),
1093 (__mmask8) __U,
1094 _MM_FROUND_CUR_DIRECTION);
1097 static __inline__ __m512 __DEFAULT_FN_ATTRS
1098 _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
1099 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
1100 (__v16sf) __B,
1101 (__v16sf) __W,
1102 (__mmask16) __U,
1103 _MM_FROUND_CUR_DIRECTION);
1106 static __inline__ __m512 __DEFAULT_FN_ATTRS
1107 _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
1108 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
1109 (__v16sf) __B,
1110 (__v16sf) _mm512_setzero_ps (),
1111 (__mmask16) __U,
1112 _MM_FROUND_CUR_DIRECTION);
1115 #define _mm512_add_round_pd(__A, __B, __R) __extension__ ({ \
1116 (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, (__v8df) __B, \
1117 (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); })
1119 #define _mm512_mask_add_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \
1120 (__m512d) __builtin_ia32_addpd512_mask((__v8df) __A, (__v8df) __B, \
1121 (__v8df) __W, (__mmask8) __U, __R); })
1123 #define _mm512_maskz_add_round_pd(__U, __A, __B, __R) __extension__ ({ \
1124 (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, (__v8df) __B, \
1125 (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R); })
1127 #define _mm512_add_round_ps(__A, __B, __R) __extension__ ({ \
1128 (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, (__v16sf) __B, \
1129 (__v16sf) _mm512_setzero_ps(), (__mmask16) -1, __R); })
1131 #define _mm512_mask_add_round_ps(__W, __U, __A, __B, __R) __extension__ ({ \
1132 (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, (__v16sf) __B, \
1133 (__v16sf) __W, (__mmask16)__U, __R); })
1135 #define _mm512_maskz_add_round_ps(__U, __A, __B, __R) __extension__ ({ \
1136 (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, (__v16sf) __B, \
1137 (__v16sf) _mm512_setzero_ps(), (__mmask16)__U, __R); })
1139 static __inline__ __m128 __DEFAULT_FN_ATTRS
1140 _mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1141 return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
1142 (__v4sf) __B,
1143 (__v4sf) __W,
1144 (__mmask8) __U,
1145 _MM_FROUND_CUR_DIRECTION);
1148 static __inline__ __m128 __DEFAULT_FN_ATTRS
1149 _mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1150 return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
1151 (__v4sf) __B,
1152 (__v4sf) _mm_setzero_ps (),
1153 (__mmask8) __U,
1154 _MM_FROUND_CUR_DIRECTION);
1156 #define _mm_sub_round_ss(__A, __B, __R) __extension__ ({ \
1157 (__m128) __builtin_ia32_subss_round ((__v4sf) __A, (__v4sf) __B, \
1158 (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
1160 #define _mm_mask_sub_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
1161 (__m128) __builtin_ia32_subss_round ((__v4sf) __A, (__v4sf) __B, \
1162 (__v4sf) __W, (__mmask8) __U,__R); })
1164 #define _mm_maskz_sub_round_ss(__U, __A, __B, __R) __extension__ ({ \
1165 (__m128) __builtin_ia32_subss_round ((__v4sf) __A, (__v4sf) __B, \
1166 (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); })
1168 static __inline__ __m128d __DEFAULT_FN_ATTRS
1169 _mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1170 return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
1171 (__v2df) __B,
1172 (__v2df) __W,
1173 (__mmask8) __U,
1174 _MM_FROUND_CUR_DIRECTION);
1177 static __inline__ __m128d __DEFAULT_FN_ATTRS
1178 _mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1179 return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
1180 (__v2df) __B,
1181 (__v2df) _mm_setzero_pd (),
1182 (__mmask8) __U,
1183 _MM_FROUND_CUR_DIRECTION);
1186 #define _mm_sub_round_sd(__A, __B, __R) __extension__ ({ \
1187 (__m128d) __builtin_ia32_subsd_round ((__v2df) __A, (__v2df) __B, \
1188 (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
1190 #define _mm_mask_sub_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
1191 (__m128d) __builtin_ia32_subsd_round ((__v2df) __A, (__v2df) __B, \
1192 (__v2df) __W, (__mmask8) __U,__R); })
1194 #define _mm_maskz_sub_round_sd(__U, __A, __B, __R) __extension__ ({ \
1195 (__m128d) __builtin_ia32_subsd_round ((__v2df) __A, (__v2df) __B, \
1196 (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); })
1198 static __inline__ __m512d __DEFAULT_FN_ATTRS
1199 _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1200 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
1201 (__v8df) __B,
1202 (__v8df) __W,
1203 (__mmask8) __U,
1204 _MM_FROUND_CUR_DIRECTION);
1207 static __inline__ __m512d __DEFAULT_FN_ATTRS
1208 _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
1209 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
1210 (__v8df) __B,
1211 (__v8df)
1212 _mm512_setzero_pd (),
1213 (__mmask8) __U,
1214 _MM_FROUND_CUR_DIRECTION);
1217 static __inline__ __m512 __DEFAULT_FN_ATTRS
1218 _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
1219 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
1220 (__v16sf) __B,
1221 (__v16sf) __W,
1222 (__mmask16) __U,
1223 _MM_FROUND_CUR_DIRECTION);
1226 static __inline__ __m512 __DEFAULT_FN_ATTRS
1227 _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
1228 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
1229 (__v16sf) __B,
1230 (__v16sf)
1231 _mm512_setzero_ps (),
1232 (__mmask16) __U,
1233 _MM_FROUND_CUR_DIRECTION);
1236 #define _mm512_sub_round_pd(__A, __B, __R) __extension__ ({ \
1237 (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, (__v8df) __B,\
1238 (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); })
1240 #define _mm512_mask_sub_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \
1241 (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, (__v8df) __B, \
1242 (__v8df) __W, (__mmask8) __U, __R); })
1244 #define _mm512_maskz_sub_round_pd(__U, __A, __B, __R) __extension__ ({ \
1245 (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, (__v8df) __B, \
1246 (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);})
1248 #define _mm512_sub_round_ps(__A, __B, __R) __extension__ ({ \
1249 (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, (__v16sf) __B, \
1250 (__v16sf) _mm512_setzero_ps (), (__mmask16) -1, __R);})
1252 #define _mm512_mask_sub_round_ps(__W, __U, __A, __B, __R) __extension__ ({ \
1253 (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, (__v16sf) __B, \
1254 (__v16sf) __W, (__mmask16) __U, __R); });
1256 #define _mm512_maskz_sub_round_ps(__U, __A, __B, __R) __extension__ ({ \
1257 (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, (__v16sf) __B, \
1258 (__v16sf) _mm512_setzero_ps (), (__mmask16) __U, __R);});
1260 static __inline__ __m128 __DEFAULT_FN_ATTRS
1261 _mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1262 return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
1263 (__v4sf) __B,
1264 (__v4sf) __W,
1265 (__mmask8) __U,
1266 _MM_FROUND_CUR_DIRECTION);
1269 static __inline__ __m128 __DEFAULT_FN_ATTRS
1270 _mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1271 return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
1272 (__v4sf) __B,
1273 (__v4sf) _mm_setzero_ps (),
1274 (__mmask8) __U,
1275 _MM_FROUND_CUR_DIRECTION);
1277 #define _mm_mul_round_ss(__A, __B, __R) __extension__ ({ \
1278 (__m128) __builtin_ia32_mulss_round ((__v4sf) __A, (__v4sf) __B, \
1279 (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
1281 #define _mm_mask_mul_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
1282 (__m128) __builtin_ia32_mulss_round ((__v4sf) __A, (__v4sf) __B, \
1283 (__v4sf) __W, (__mmask8) __U,__R); })
1285 #define _mm_maskz_mul_round_ss(__U, __A, __B, __R) __extension__ ({ \
1286 (__m128) __builtin_ia32_mulss_round ((__v4sf) __A, (__v4sf) __B, \
1287 (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); })
1289 static __inline__ __m128d __DEFAULT_FN_ATTRS
1290 _mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1291 return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
1292 (__v2df) __B,
1293 (__v2df) __W,
1294 (__mmask8) __U,
1295 _MM_FROUND_CUR_DIRECTION);
1298 static __inline__ __m128d __DEFAULT_FN_ATTRS
1299 _mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1300 return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
1301 (__v2df) __B,
1302 (__v2df) _mm_setzero_pd (),
1303 (__mmask8) __U,
1304 _MM_FROUND_CUR_DIRECTION);
1307 #define _mm_mul_round_sd(__A, __B, __R) __extension__ ({ \
1308 (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A, (__v2df) __B, \
1309 (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
1311 #define _mm_mask_mul_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
1312 (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A, (__v2df) __B, \
1313 (__v2df) __W, (__mmask8) __U,__R); })
1315 #define _mm_maskz_mul_round_sd(__U, __A, __B, __R) __extension__ ({ \
1316 (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A, (__v2df) __B, \
1317 (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); })
1319 static __inline__ __m512d __DEFAULT_FN_ATTRS
1320 _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1321 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
1322 (__v8df) __B,
1323 (__v8df) __W,
1324 (__mmask8) __U,
1325 _MM_FROUND_CUR_DIRECTION);
1328 static __inline__ __m512d __DEFAULT_FN_ATTRS
1329 _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
1330 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
1331 (__v8df) __B,
1332 (__v8df)
1333 _mm512_setzero_pd (),
1334 (__mmask8) __U,
1335 _MM_FROUND_CUR_DIRECTION);
1338 static __inline__ __m512 __DEFAULT_FN_ATTRS
1339 _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
1340 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
1341 (__v16sf) __B,
1342 (__v16sf) __W,
1343 (__mmask16) __U,
1344 _MM_FROUND_CUR_DIRECTION);
1347 static __inline__ __m512 __DEFAULT_FN_ATTRS
1348 _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
1349 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
1350 (__v16sf) __B,
1351 (__v16sf)
1352 _mm512_setzero_ps (),
1353 (__mmask16) __U,
1354 _MM_FROUND_CUR_DIRECTION);
1357 #define _mm512_mul_round_pd(__A, __B, __R) __extension__ ({ \
1358 (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, (__v8df) __B,\
1359 (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); })
1361 #define _mm512_mask_mul_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \
1362 (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, (__v8df) __B, \
1363 (__v8df) __W, (__mmask8) __U, __R); })
1365 #define _mm512_maskz_mul_round_pd(__U, __A, __B, __R) __extension__ ({ \
1366 (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, (__v8df) __B, \
1367 (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);})
1369 #define _mm512_mul_round_ps(__A, __B, __R) __extension__ ({ \
1370 (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, (__v16sf) __B, \
1371 (__v16sf) _mm512_setzero_ps (), (__mmask16) -1, __R);})
1373 #define _mm512_mask_mul_round_ps(__W, __U, __A, __B, __R) __extension__ ({ \
1374 (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, (__v16sf) __B, \
1375 (__v16sf) __W, (__mmask16) __U, __R); });
1377 #define _mm512_maskz_mul_round_ps(__U, __A, __B, __R) __extension__ ({ \
1378 (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, (__v16sf) __B, \
1379 (__v16sf) _mm512_setzero_ps (), (__mmask16) __U, __R);});
1381 static __inline__ __m128 __DEFAULT_FN_ATTRS
1382 _mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1383 return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
1384 (__v4sf) __B,
1385 (__v4sf) __W,
1386 (__mmask8) __U,
1387 _MM_FROUND_CUR_DIRECTION);
1390 static __inline__ __m128 __DEFAULT_FN_ATTRS
1391 _mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1392 return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
1393 (__v4sf) __B,
1394 (__v4sf) _mm_setzero_ps (),
1395 (__mmask8) __U,
1396 _MM_FROUND_CUR_DIRECTION);
1399 #define _mm_div_round_ss(__A, __B, __R) __extension__ ({ \
1400 (__m128) __builtin_ia32_divss_round ((__v4sf) __A, (__v4sf) __B, \
1401 (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
1403 #define _mm_mask_div_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
1404 (__m128) __builtin_ia32_divss_round ((__v4sf) __A, (__v4sf) __B, \
1405 (__v4sf) __W, (__mmask8) __U,__R); })
1407 #define _mm_maskz_div_round_ss(__U, __A, __B, __R) __extension__ ({ \
1408 (__m128) __builtin_ia32_divss_round ((__v4sf) __A, (__v4sf) __B, \
1409 (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); })
1411 static __inline__ __m128d __DEFAULT_FN_ATTRS
1412 _mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1413 return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
1414 (__v2df) __B,
1415 (__v2df) __W,
1416 (__mmask8) __U,
1417 _MM_FROUND_CUR_DIRECTION);
1420 static __inline__ __m128d __DEFAULT_FN_ATTRS
1421 _mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1422 return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
1423 (__v2df) __B,
1424 (__v2df) _mm_setzero_pd (),
1425 (__mmask8) __U,
1426 _MM_FROUND_CUR_DIRECTION);
1429 #define _mm_div_round_sd(__A, __B, __R) __extension__ ({ \
1430 (__m128d) __builtin_ia32_divsd_round ((__v2df) __A, (__v2df) __B, \
1431 (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
1433 #define _mm_mask_div_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
1434 (__m128d) __builtin_ia32_divsd_round ((__v2df) __A, (__v2df) __B, \
1435 (__v2df) __W, (__mmask8) __U,__R); })
1437 #define _mm_maskz_div_round_sd(__U, __A, __B, __R) __extension__ ({ \
1438 (__m128d) __builtin_ia32_divsd_round ((__v2df) __A, (__v2df) __B, \
1439 (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); })
1441 static __inline__ __m512d __DEFAULT_FN_ATTRS
1442 _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1443 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A,
1444 (__v8df) __B,
1445 (__v8df) __W,
1446 (__mmask8) __U,
1447 _MM_FROUND_CUR_DIRECTION);
1450 static __inline__ __m512d __DEFAULT_FN_ATTRS
1451 _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
1452 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A,
1453 (__v8df) __B,
1454 (__v8df)
1455 _mm512_setzero_pd (),
1456 (__mmask8) __U,
1457 _MM_FROUND_CUR_DIRECTION);
1460 static __inline__ __m512 __DEFAULT_FN_ATTRS
1461 _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
1462 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
1463 (__v16sf) __B,
1464 (__v16sf) __W,
1465 (__mmask16) __U,
1466 _MM_FROUND_CUR_DIRECTION);
1469 static __inline__ __m512 __DEFAULT_FN_ATTRS
1470 _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
1471 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
1472 (__v16sf) __B,
1473 (__v16sf)
1474 _mm512_setzero_ps (),
1475 (__mmask16) __U,
1476 _MM_FROUND_CUR_DIRECTION);
1479 #define _mm512_div_round_pd(__A, __B, __R) __extension__ ({ \
1480 (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, (__v8df) __B,\
1481 (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); })
1483 #define _mm512_mask_div_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \
1484 (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, (__v8df) __B, \
1485 (__v8df) __W, (__mmask8) __U, __R); })
1487 #define _mm512_maskz_div_round_pd(__U, __A, __B, __R) __extension__ ({ \
1488 (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, (__v8df) __B, \
1489 (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);})
1491 #define _mm512_div_round_ps(__A, __B, __R) __extension__ ({ \
1492 (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, (__v16sf) __B, \
1493 (__v16sf) _mm512_setzero_ps (), (__mmask16) -1, __R);})
1495 #define _mm512_mask_div_round_ps(__W, __U, __A, __B, __R) __extension__ ({ \
1496 (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, (__v16sf) __B, \
1497 (__v16sf) __W, (__mmask16) __U, __R); });
1499 #define _mm512_maskz_div_round_ps(__U, __A, __B, __R) __extension__ ({ \
1500 (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, (__v16sf) __B, \
1501 (__v16sf) _mm512_setzero_ps (), (__mmask16) __U, __R);});
1503 #define _mm512_roundscale_ps(A, B) __extension__ ({ \
1504 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(A), (B), (__v16sf)(A), \
1505 -1, _MM_FROUND_CUR_DIRECTION); })
1507 #define _mm512_roundscale_pd(A, B) __extension__ ({ \
1508 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(A), (B), (__v8df)(A), \
1509 -1, _MM_FROUND_CUR_DIRECTION); })
1511 #define _mm512_fmadd_round_pd(A, B, C, R) __extension__ ({ \
1512 (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
1513 (__v8df) (B), (__v8df) (C), \
1514 (__mmask8) -1, (R)); })
1517 #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) __extension__ ({ \
1518 (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
1519 (__v8df) (B), (__v8df) (C), \
1520 (__mmask8) (U), (R)); })
1523 #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) __extension__ ({ \
1524 (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) (A), \
1525 (__v8df) (B), (__v8df) (C), \
1526 (__mmask8) (U), (R)); })
1529 #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) __extension__ ({ \
1530 (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) (A), \
1531 (__v8df) (B), (__v8df) (C), \
1532 (__mmask8) (U), (R)); })
1535 #define _mm512_fmsub_round_pd(A, B, C, R) __extension__ ({ \
1536 (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
1537 (__v8df) (B), -(__v8df) (C), \
1538 (__mmask8) -1, (R)); })
1541 #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) __extension__ ({ \
1542 (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
1543 (__v8df) (B), -(__v8df) (C), \
1544 (__mmask8) (U), (R)); })
1547 #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) __extension__ ({ \
1548 (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) (A), \
1549 (__v8df) (B), -(__v8df) (C), \
1550 (__mmask8) (U), (R)); })
1553 #define _mm512_fnmadd_round_pd(A, B, C, R) __extension__ ({ \
1554 (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) (A), \
1555 (__v8df) (B), (__v8df) (C), \
1556 (__mmask8) -1, (R)); })
1559 #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) __extension__ ({ \
1560 (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) (A), \
1561 (__v8df) (B), (__v8df) (C), \
1562 (__mmask8) (U), (R)); })
1565 #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) __extension__ ({ \
1566 (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) (A), \
1567 (__v8df) (B), (__v8df) (C), \
1568 (__mmask8) (U), (R)); })
1571 #define _mm512_fnmsub_round_pd(A, B, C, R) __extension__ ({ \
1572 (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) (A), \
1573 (__v8df) (B), -(__v8df) (C), \
1574 (__mmask8) -1, (R)); })
1577 #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) __extension__ ({ \
1578 (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) (A), \
1579 (__v8df) (B), -(__v8df) (C), \
1580 (__mmask8) (U), (R)); })
1583 static __inline__ __m512d __DEFAULT_FN_ATTRS
1584 _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
1586 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
1587 (__v8df) __B,
1588 (__v8df) __C,
1589 (__mmask8) -1,
1590 _MM_FROUND_CUR_DIRECTION);
1593 static __inline__ __m512d __DEFAULT_FN_ATTRS
1594 _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
1596 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
1597 (__v8df) __B,
1598 (__v8df) __C,
1599 (__mmask8) __U,
1600 _MM_FROUND_CUR_DIRECTION);
1603 static __inline__ __m512d __DEFAULT_FN_ATTRS
1604 _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
1606 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
1607 (__v8df) __B,
1608 (__v8df) __C,
1609 (__mmask8) __U,
1610 _MM_FROUND_CUR_DIRECTION);
1613 static __inline__ __m512d __DEFAULT_FN_ATTRS
1614 _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
1616 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
1617 (__v8df) __B,
1618 (__v8df) __C,
1619 (__mmask8) __U,
1620 _MM_FROUND_CUR_DIRECTION);
1623 static __inline__ __m512d __DEFAULT_FN_ATTRS
1624 _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
1626 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
1627 (__v8df) __B,
1628 -(__v8df) __C,
1629 (__mmask8) -1,
1630 _MM_FROUND_CUR_DIRECTION);
1633 static __inline__ __m512d __DEFAULT_FN_ATTRS
1634 _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
1636 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
1637 (__v8df) __B,
1638 -(__v8df) __C,
1639 (__mmask8) __U,
1640 _MM_FROUND_CUR_DIRECTION);
1643 static __inline__ __m512d __DEFAULT_FN_ATTRS
1644 _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
1646 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
1647 (__v8df) __B,
1648 -(__v8df) __C,
1649 (__mmask8) __U,
1650 _MM_FROUND_CUR_DIRECTION);
1653 static __inline__ __m512d __DEFAULT_FN_ATTRS
1654 _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
1656 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
1657 (__v8df) __B,
1658 (__v8df) __C,
1659 (__mmask8) -1,
1660 _MM_FROUND_CUR_DIRECTION);
1663 static __inline__ __m512d __DEFAULT_FN_ATTRS
1664 _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
1666 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
1667 (__v8df) __B,
1668 (__v8df) __C,
1669 (__mmask8) __U,
1670 _MM_FROUND_CUR_DIRECTION);
1673 static __inline__ __m512d __DEFAULT_FN_ATTRS
1674 _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
1676 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
1677 (__v8df) __B,
1678 (__v8df) __C,
1679 (__mmask8) __U,
1680 _MM_FROUND_CUR_DIRECTION);
1683 static __inline__ __m512d __DEFAULT_FN_ATTRS
1684 _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
1686 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
1687 (__v8df) __B,
1688 -(__v8df) __C,
1689 (__mmask8) -1,
1690 _MM_FROUND_CUR_DIRECTION);
1693 static __inline__ __m512d __DEFAULT_FN_ATTRS
1694 _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
1696 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
1697 (__v8df) __B,
1698 -(__v8df) __C,
1699 (__mmask8) __U,
1700 _MM_FROUND_CUR_DIRECTION);
1703 #define _mm512_fmadd_round_ps(A, B, C, R) __extension__ ({ \
1704 (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
1705 (__v16sf) (B), (__v16sf) (C), \
1706 (__mmask16) -1, (R)); })
1709 #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) __extension__ ({ \
1710 (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
1711 (__v16sf) (B), (__v16sf) (C), \
1712 (__mmask16) (U), (R)); })
1715 #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) __extension__ ({ \
1716 (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) (A), \
1717 (__v16sf) (B), (__v16sf) (C), \
1718 (__mmask16) (U), (R)); })
1721 #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) __extension__ ({ \
1722 (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) (A), \
1723 (__v16sf) (B), (__v16sf) (C), \
1724 (__mmask16) (U), (R)); })
1727 #define _mm512_fmsub_round_ps(A, B, C, R) __extension__ ({ \
1728 (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
1729 (__v16sf) (B), -(__v16sf) (C), \
1730 (__mmask16) -1, (R)); })
1733 #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) __extension__ ({ \
1734 (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
1735 (__v16sf) (B), -(__v16sf) (C), \
1736 (__mmask16) (U), (R)); })
1739 #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) __extension__ ({ \
1740 (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) (A), \
1741 (__v16sf) (B), -(__v16sf) (C), \
1742 (__mmask16) (U), (R)); })
1745 #define _mm512_fnmadd_round_ps(A, B, C, R) __extension__ ({ \
1746 (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) (A), \
1747 (__v16sf) (B), (__v16sf) (C), \
1748 (__mmask16) -1, (R)); })
1751 #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) __extension__ ({ \
1752 (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) (A), \
1753 (__v16sf) (B), (__v16sf) (C), \
1754 (__mmask16) (U), (R)); })
1757 #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) __extension__ ({ \
1758 (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) (A), \
1759 (__v16sf) (B), (__v16sf) (C), \
1760 (__mmask16) (U), (R)); })
1763 #define _mm512_fnmsub_round_ps(A, B, C, R) __extension__ ({ \
1764 (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) (A), \
1765 (__v16sf) (B), -(__v16sf) (C), \
1766 (__mmask16) -1, (R)); })
1769 #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) __extension__ ({ \
1770 (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) (A), \
1771 (__v16sf) (B), -(__v16sf) (C), \
1772 (__mmask16) (U), (R)); })
1775 static __inline__ __m512 __DEFAULT_FN_ATTRS
1776 _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
1778 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
1779 (__v16sf) __B,
1780 (__v16sf) __C,
1781 (__mmask16) -1,
1782 _MM_FROUND_CUR_DIRECTION);
1785 static __inline__ __m512 __DEFAULT_FN_ATTRS
1786 _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
1788 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
1789 (__v16sf) __B,
1790 (__v16sf) __C,
1791 (__mmask16) __U,
1792 _MM_FROUND_CUR_DIRECTION);
1795 static __inline__ __m512 __DEFAULT_FN_ATTRS
1796 _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
1798 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
1799 (__v16sf) __B,
1800 (__v16sf) __C,
1801 (__mmask16) __U,
1802 _MM_FROUND_CUR_DIRECTION);
1805 static __inline__ __m512 __DEFAULT_FN_ATTRS
1806 _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
1808 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
1809 (__v16sf) __B,
1810 (__v16sf) __C,
1811 (__mmask16) __U,
1812 _MM_FROUND_CUR_DIRECTION);
1815 static __inline__ __m512 __DEFAULT_FN_ATTRS
1816 _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
1818 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
1819 (__v16sf) __B,
1820 -(__v16sf) __C,
1821 (__mmask16) -1,
1822 _MM_FROUND_CUR_DIRECTION);
1825 static __inline__ __m512 __DEFAULT_FN_ATTRS
1826 _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
1828 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
1829 (__v16sf) __B,
1830 -(__v16sf) __C,
1831 (__mmask16) __U,
1832 _MM_FROUND_CUR_DIRECTION);
1835 static __inline__ __m512 __DEFAULT_FN_ATTRS
1836 _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
1838 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
1839 (__v16sf) __B,
1840 -(__v16sf) __C,
1841 (__mmask16) __U,
1842 _MM_FROUND_CUR_DIRECTION);
1845 static __inline__ __m512 __DEFAULT_FN_ATTRS
1846 _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
1848 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
1849 (__v16sf) __B,
1850 (__v16sf) __C,
1851 (__mmask16) -1,
1852 _MM_FROUND_CUR_DIRECTION);
1855 static __inline__ __m512 __DEFAULT_FN_ATTRS
1856 _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
1858 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
1859 (__v16sf) __B,
1860 (__v16sf) __C,
1861 (__mmask16) __U,
1862 _MM_FROUND_CUR_DIRECTION);
1865 static __inline__ __m512 __DEFAULT_FN_ATTRS
1866 _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
1868 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
1869 (__v16sf) __B,
1870 (__v16sf) __C,
1871 (__mmask16) __U,
1872 _MM_FROUND_CUR_DIRECTION);
1875 static __inline__ __m512 __DEFAULT_FN_ATTRS
1876 _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
1878 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
1879 (__v16sf) __B,
1880 -(__v16sf) __C,
1881 (__mmask16) -1,
1882 _MM_FROUND_CUR_DIRECTION);
1885 static __inline__ __m512 __DEFAULT_FN_ATTRS
1886 _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
1888 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
1889 (__v16sf) __B,
1890 -(__v16sf) __C,
1891 (__mmask16) __U,
1892 _MM_FROUND_CUR_DIRECTION);
1895 #define _mm512_fmaddsub_round_pd(A, B, C, R) __extension__ ({ \
1896 (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
1897 (__v8df) (B), (__v8df) (C), \
1898 (__mmask8) -1, (R)); })
1901 #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) __extension__ ({ \
1902 (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
1903 (__v8df) (B), (__v8df) (C), \
1904 (__mmask8) (U), (R)); })
1907 #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) __extension__ ({ \
1908 (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) (A), \
1909 (__v8df) (B), (__v8df) (C), \
1910 (__mmask8) (U), (R)); })
1913 #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) __extension__ ({ \
1914 (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) (A), \
1915 (__v8df) (B), (__v8df) (C), \
1916 (__mmask8) (U), (R)); })
1919 #define _mm512_fmsubadd_round_pd(A, B, C, R) __extension__ ({ \
1920 (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
1921 (__v8df) (B), -(__v8df) (C), \
1922 (__mmask8) -1, (R)); })
1925 #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) __extension__ ({ \
1926 (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
1927 (__v8df) (B), -(__v8df) (C), \
1928 (__mmask8) (U), (R)); })
1931 #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) __extension__ ({ \
1932 (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) (A), \
1933 (__v8df) (B), -(__v8df) (C), \
1934 (__mmask8) (U), (R)); })
1937 static __inline__ __m512d __DEFAULT_FN_ATTRS
1938 _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
1940 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
1941 (__v8df) __B,
1942 (__v8df) __C,
1943 (__mmask8) -1,
1944 _MM_FROUND_CUR_DIRECTION);
1947 static __inline__ __m512d __DEFAULT_FN_ATTRS
1948 _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
1950 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
1951 (__v8df) __B,
1952 (__v8df) __C,
1953 (__mmask8) __U,
1954 _MM_FROUND_CUR_DIRECTION);
1957 static __inline__ __m512d __DEFAULT_FN_ATTRS
1958 _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
1960 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
1961 (__v8df) __B,
1962 (__v8df) __C,
1963 (__mmask8) __U,
1964 _MM_FROUND_CUR_DIRECTION);
1967 static __inline__ __m512d __DEFAULT_FN_ATTRS
1968 _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
1970 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
1971 (__v8df) __B,
1972 (__v8df) __C,
1973 (__mmask8) __U,
1974 _MM_FROUND_CUR_DIRECTION);
1977 static __inline__ __m512d __DEFAULT_FN_ATTRS
1978 _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
1980 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
1981 (__v8df) __B,
1982 -(__v8df) __C,
1983 (__mmask8) -1,
1984 _MM_FROUND_CUR_DIRECTION);
1987 static __inline__ __m512d __DEFAULT_FN_ATTRS
1988 _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
1990 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
1991 (__v8df) __B,
1992 -(__v8df) __C,
1993 (__mmask8) __U,
1994 _MM_FROUND_CUR_DIRECTION);
1997 static __inline__ __m512d __DEFAULT_FN_ATTRS
1998 _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2000 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2001 (__v8df) __B,
2002 -(__v8df) __C,
2003 (__mmask8) __U,
2004 _MM_FROUND_CUR_DIRECTION);
2007 #define _mm512_fmaddsub_round_ps(A, B, C, R) __extension__ ({ \
2008 (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
2009 (__v16sf) (B), (__v16sf) (C), \
2010 (__mmask16) -1, (R)); })
2013 #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) __extension__ ({ \
2014 (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
2015 (__v16sf) (B), (__v16sf) (C), \
2016 (__mmask16) (U), (R)); })
2019 #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) __extension__ ({ \
2020 (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) (A), \
2021 (__v16sf) (B), (__v16sf) (C), \
2022 (__mmask16) (U), (R)); })
2025 #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) __extension__ ({ \
2026 (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) (A), \
2027 (__v16sf) (B), (__v16sf) (C), \
2028 (__mmask16) (U), (R)); })
2031 #define _mm512_fmsubadd_round_ps(A, B, C, R) __extension__ ({ \
2032 (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
2033 (__v16sf) (B), -(__v16sf) (C), \
2034 (__mmask16) -1, (R)); })
2037 #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) __extension__ ({ \
2038 (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
2039 (__v16sf) (B), -(__v16sf) (C), \
2040 (__mmask16) (U), (R)); })
2043 #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) __extension__ ({ \
2044 (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) (A), \
2045 (__v16sf) (B), -(__v16sf) (C), \
2046 (__mmask16) (U), (R)); })
2049 static __inline__ __m512 __DEFAULT_FN_ATTRS
2050 _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
2052 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2053 (__v16sf) __B,
2054 (__v16sf) __C,
2055 (__mmask16) -1,
2056 _MM_FROUND_CUR_DIRECTION);
2059 static __inline__ __m512 __DEFAULT_FN_ATTRS
2060 _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2062 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2063 (__v16sf) __B,
2064 (__v16sf) __C,
2065 (__mmask16) __U,
2066 _MM_FROUND_CUR_DIRECTION);
2069 static __inline__ __m512 __DEFAULT_FN_ATTRS
2070 _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2072 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
2073 (__v16sf) __B,
2074 (__v16sf) __C,
2075 (__mmask16) __U,
2076 _MM_FROUND_CUR_DIRECTION);
2079 static __inline__ __m512 __DEFAULT_FN_ATTRS
2080 _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2082 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
2083 (__v16sf) __B,
2084 (__v16sf) __C,
2085 (__mmask16) __U,
2086 _MM_FROUND_CUR_DIRECTION);
2089 static __inline__ __m512 __DEFAULT_FN_ATTRS
2090 _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
2092 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2093 (__v16sf) __B,
2094 -(__v16sf) __C,
2095 (__mmask16) -1,
2096 _MM_FROUND_CUR_DIRECTION);
2099 static __inline__ __m512 __DEFAULT_FN_ATTRS
2100 _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2102 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2103 (__v16sf) __B,
2104 -(__v16sf) __C,
2105 (__mmask16) __U,
2106 _MM_FROUND_CUR_DIRECTION);
2109 static __inline__ __m512 __DEFAULT_FN_ATTRS
2110 _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2112 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
2113 (__v16sf) __B,
2114 -(__v16sf) __C,
2115 (__mmask16) __U,
2116 _MM_FROUND_CUR_DIRECTION);
2119 #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) __extension__ ({ \
2120 (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) (A), \
2121 (__v8df) (B), (__v8df) (C), \
2122 (__mmask8) (U), (R)); })
2125 static __inline__ __m512d __DEFAULT_FN_ATTRS
2126 _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2128 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
2129 (__v8df) __B,
2130 (__v8df) __C,
2131 (__mmask8) __U,
2132 _MM_FROUND_CUR_DIRECTION);
2135 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) __extension__ ({ \
2136 (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) (A), \
2137 (__v16sf) (B), (__v16sf) (C), \
2138 (__mmask16) (U), (R)); })
2141 static __inline__ __m512 __DEFAULT_FN_ATTRS
2142 _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2144 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
2145 (__v16sf) __B,
2146 (__v16sf) __C,
2147 (__mmask16) __U,
2148 _MM_FROUND_CUR_DIRECTION);
2151 #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) __extension__ ({ \
2152 (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) (A), \
2153 (__v8df) (B), (__v8df) (C), \
2154 (__mmask8) (U), (R)); })
2157 static __inline__ __m512d __DEFAULT_FN_ATTRS
2158 _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2160 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
2161 (__v8df) __B,
2162 (__v8df) __C,
2163 (__mmask8) __U,
2164 _MM_FROUND_CUR_DIRECTION);
2167 #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) __extension__ ({ \
2168 (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) (A), \
2169 (__v16sf) (B), (__v16sf) (C), \
2170 (__mmask16) (U), (R)); })
2173 static __inline__ __m512 __DEFAULT_FN_ATTRS
2174 _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2176 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
2177 (__v16sf) __B,
2178 (__v16sf) __C,
2179 (__mmask16) __U,
2180 _MM_FROUND_CUR_DIRECTION);
2183 #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) __extension__ ({ \
2184 (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) (A), \
2185 (__v8df) (B), (__v8df) (C), \
2186 (__mmask8) (U), (R)); })
2189 static __inline__ __m512d __DEFAULT_FN_ATTRS
2190 _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2192 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
2193 (__v8df) __B,
2194 (__v8df) __C,
2195 (__mmask8) __U,
2196 _MM_FROUND_CUR_DIRECTION);
2199 #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) __extension__ ({ \
2200 (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) (A), \
2201 (__v16sf) (B), (__v16sf) (C), \
2202 (__mmask16) (U), (R)); })
2205 static __inline__ __m512 __DEFAULT_FN_ATTRS
2206 _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2208 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
2209 (__v16sf) __B,
2210 (__v16sf) __C,
2211 (__mmask16) __U,
2212 _MM_FROUND_CUR_DIRECTION);
2215 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) __extension__ ({ \
2216 (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) (A), \
2217 (__v8df) (B), (__v8df) (C), \
2218 (__mmask8) (U), (R)); })
2221 #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) __extension__ ({ \
2222 (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) (A), \
2223 (__v8df) (B), (__v8df) (C), \
2224 (__mmask8) (U), (R)); })
2227 static __inline__ __m512d __DEFAULT_FN_ATTRS
2228 _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2230 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
2231 (__v8df) __B,
2232 (__v8df) __C,
2233 (__mmask8) __U,
2234 _MM_FROUND_CUR_DIRECTION);
2237 static __inline__ __m512d __DEFAULT_FN_ATTRS
2238 _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2240 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
2241 (__v8df) __B,
2242 (__v8df) __C,
2243 (__mmask8) __U,
2244 _MM_FROUND_CUR_DIRECTION);
2247 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) __extension__ ({ \
2248 (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) (A), \
2249 (__v16sf) (B), (__v16sf) (C), \
2250 (__mmask16) (U), (R)); })
2253 #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) __extension__ ({ \
2254 (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) (A), \
2255 (__v16sf) (B), (__v16sf) (C), \
2256 (__mmask16) (U), (R)); })
2259 static __inline__ __m512 __DEFAULT_FN_ATTRS
2260 _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2262 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
2263 (__v16sf) __B,
2264 (__v16sf) __C,
2265 (__mmask16) __U,
2266 _MM_FROUND_CUR_DIRECTION);
2269 static __inline__ __m512 __DEFAULT_FN_ATTRS
2270 _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2272 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
2273 (__v16sf) __B,
2274 (__v16sf) __C,
2275 (__mmask16) __U,
2276 _MM_FROUND_CUR_DIRECTION);
2281 /* Vector permutations */
2283 static __inline __m512i __DEFAULT_FN_ATTRS
2284 _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
2286 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
2287 /* idx */ ,
2288 (__v16si) __A,
2289 (__v16si) __B,
2290 (__mmask16) -1);
2292 static __inline __m512i __DEFAULT_FN_ATTRS
2293 _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
2295 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
2296 /* idx */ ,
2297 (__v8di) __A,
2298 (__v8di) __B,
2299 (__mmask8) -1);
2302 static __inline __m512d __DEFAULT_FN_ATTRS
2303 _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
2305 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
2306 /* idx */ ,
2307 (__v8df) __A,
2308 (__v8df) __B,
2309 (__mmask8) -1);
2311 static __inline __m512 __DEFAULT_FN_ATTRS
2312 _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
2314 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
2315 /* idx */ ,
2316 (__v16sf) __A,
2317 (__v16sf) __B,
2318 (__mmask16) -1);
2321 #define _mm512_alignr_epi64(A, B, I) __extension__ ({ \
2322 (__m512i)__builtin_ia32_alignq512_mask((__v8di)(__m512i)(A), \
2323 (__v8di)(__m512i)(B), \
2324 (I), (__v8di)_mm512_setzero_si512(), \
2325 (__mmask8)-1); })
2327 #define _mm512_alignr_epi32(A, B, I) __extension__ ({ \
2328 (__m512i)__builtin_ia32_alignd512_mask((__v16si)(__m512i)(A), \
2329 (__v16si)(__m512i)(B), \
2330 (I), (__v16si)_mm512_setzero_si512(), \
2331 (__mmask16)-1); })
2333 /* Vector Extract */
2335 #define _mm512_extractf64x4_pd(A, I) __extension__ ({ \
2336 (__m256d) \
2337 __builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), \
2338 (I), \
2339 (__v4df)_mm256_setzero_si256(), \
2340 (__mmask8) -1); })
2342 #define _mm512_extractf32x4_ps(A, I) __extension__ ({ \
2343 (__m128) \
2344 __builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), \
2345 (I), \
2346 (__v4sf)_mm_setzero_ps(), \
2347 (__mmask8) -1); })
2349 /* Vector Blend */
2351 static __inline __m512d __DEFAULT_FN_ATTRS
2352 _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
2354 return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
2355 (__v8df) __W,
2356 (__mmask8) __U);
2359 static __inline __m512 __DEFAULT_FN_ATTRS
2360 _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
2362 return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
2363 (__v16sf) __W,
2364 (__mmask16) __U);
2367 static __inline __m512i __DEFAULT_FN_ATTRS
2368 _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
2370 return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
2371 (__v8di) __W,
2372 (__mmask8) __U);
2375 static __inline __m512i __DEFAULT_FN_ATTRS
2376 _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
2378 return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
2379 (__v16si) __W,
2380 (__mmask16) __U);
2383 /* Compare */
2385 #define _mm512_cmp_round_ps_mask(A, B, P, R) __extension__ ({ \
2386 (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
2387 (__v16sf)(__m512)(B), \
2388 (P), (__mmask16)-1, (R)); })
2390 #define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) __extension__ ({ \
2391 (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
2392 (__v16sf)(__m512)(B), \
2393 (P), (__mmask16)(U), (R)); })
2395 #define _mm512_cmp_ps_mask(A, B, P) \
2396 _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
2398 #define _mm512_mask_cmp_ps_mask(U, A, B, P) \
2399 _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
2401 #define _mm512_cmp_round_pd_mask(A, B, P, R) __extension__ ({ \
2402 (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
2403 (__v8df)(__m512d)(B), \
2404 (P), (__mmask8)-1, (R)); })
2406 #define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) __extension__ ({ \
2407 (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
2408 (__v8df)(__m512d)(B), \
2409 (P), (__mmask8)(U), (R)); })
2411 #define _mm512_cmp_pd_mask(A, B, P) \
2412 _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
2414 #define _mm512_mask_cmp_pd_mask(U, A, B, P) \
2415 _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
2417 /* Conversion */
2419 static __inline __m512i __DEFAULT_FN_ATTRS
2420 _mm512_cvttps_epu32(__m512 __A)
2422 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
2423 (__v16si)
2424 _mm512_setzero_si512 (),
2425 (__mmask16) -1,
2426 _MM_FROUND_CUR_DIRECTION);
2429 #define _mm512_cvt_roundepi32_ps(A, R) __extension__ ({ \
2430 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), \
2431 (__v16sf)_mm512_setzero_ps(), \
2432 (__mmask16)-1, (R)); })
2434 #define _mm512_cvt_roundepu32_ps(A, R) __extension__ ({ \
2435 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), \
2436 (__v16sf)_mm512_setzero_ps(), \
2437 (__mmask16)-1, (R)); })
2439 static __inline __m512d __DEFAULT_FN_ATTRS
2440 _mm512_cvtepi32_pd(__m256i __A)
2442 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
2443 (__v8df)
2444 _mm512_setzero_pd (),
2445 (__mmask8) -1);
2448 static __inline __m512d __DEFAULT_FN_ATTRS
2449 _mm512_cvtepu32_pd(__m256i __A)
2451 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
2452 (__v8df)
2453 _mm512_setzero_pd (),
2454 (__mmask8) -1);
2457 #define _mm512_cvt_roundpd_ps(A, R) __extension__ ({ \
2458 (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(A), \
2459 (__v8sf)_mm256_setzero_ps(), \
2460 (__mmask8)-1, (R)); })
2462 #define _mm512_cvtps_ph(A, I) __extension__ ({ \
2463 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(A), (I), \
2464 (__v16hi)_mm256_setzero_si256(), \
2465 -1); })
2467 static __inline __m512 __DEFAULT_FN_ATTRS
2468 _mm512_cvtph_ps(__m256i __A)
2470 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
2471 (__v16sf)
2472 _mm512_setzero_ps (),
2473 (__mmask16) -1,
2474 _MM_FROUND_CUR_DIRECTION);
2477 static __inline __m512i __DEFAULT_FN_ATTRS
2478 _mm512_cvttps_epi32(__m512 __a)
2480 return (__m512i)
2481 __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
2482 (__v16si) _mm512_setzero_si512 (),
2483 (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
2486 static __inline __m256i __DEFAULT_FN_ATTRS
2487 _mm512_cvttpd_epi32(__m512d __a)
2489 return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
2490 (__v8si)_mm256_setzero_si256(),
2491 (__mmask8) -1,
2492 _MM_FROUND_CUR_DIRECTION);
2495 #define _mm512_cvtt_roundpd_epi32(A, R) __extension__ ({ \
2496 (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(A), \
2497 (__v8si)_mm256_setzero_si256(), \
2498 (__mmask8)-1, (R)); })
2500 #define _mm512_cvtt_roundps_epi32(A, R) __extension__ ({ \
2501 (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(A), \
2502 (__v16si)_mm512_setzero_si512(), \
2503 (__mmask16)-1, (R)); })
2505 #define _mm512_cvt_roundps_epi32(A, R) __extension__ ({ \
2506 (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(A), \
2507 (__v16si)_mm512_setzero_si512(), \
2508 (__mmask16)-1, (R)); })
2510 #define _mm512_cvt_roundpd_epi32(A, R) __extension__ ({ \
2511 (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(A), \
2512 (__v8si)_mm256_setzero_si256(), \
2513 (__mmask8)-1, (R)); })
2515 #define _mm512_cvt_roundps_epu32(A, R) __extension__ ({ \
2516 (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(A), \
2517 (__v16si)_mm512_setzero_si512(), \
2518 (__mmask16)-1, (R)); })
2520 #define _mm512_cvt_roundpd_epu32(A, R) __extension__ ({ \
2521 (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(A), \
2522 (__v8si)_mm256_setzero_si256(), \
2523 (__mmask8) -1, (R)); })
2525 /* Unpack and Interleave */
2526 static __inline __m512d __DEFAULT_FN_ATTRS
2527 _mm512_unpackhi_pd(__m512d __a, __m512d __b)
2529 return __builtin_shufflevector(__a, __b, 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
2532 static __inline __m512d __DEFAULT_FN_ATTRS
2533 _mm512_unpacklo_pd(__m512d __a, __m512d __b)
2535 return __builtin_shufflevector(__a, __b, 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
2538 static __inline __m512 __DEFAULT_FN_ATTRS
2539 _mm512_unpackhi_ps(__m512 __a, __m512 __b)
2541 return __builtin_shufflevector(__a, __b,
2542 2, 18, 3, 19,
2543 2+4, 18+4, 3+4, 19+4,
2544 2+8, 18+8, 3+8, 19+8,
2545 2+12, 18+12, 3+12, 19+12);
2548 static __inline __m512 __DEFAULT_FN_ATTRS
2549 _mm512_unpacklo_ps(__m512 __a, __m512 __b)
2551 return __builtin_shufflevector(__a, __b,
2552 0, 16, 1, 17,
2553 0+4, 16+4, 1+4, 17+4,
2554 0+8, 16+8, 1+8, 17+8,
2555 0+12, 16+12, 1+12, 17+12);
2558 /* Bit Test */
2560 static __inline __mmask16 __DEFAULT_FN_ATTRS
2561 _mm512_test_epi32_mask(__m512i __A, __m512i __B)
2563 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
2564 (__v16si) __B,
2565 (__mmask16) -1);
2568 static __inline __mmask8 __DEFAULT_FN_ATTRS
2569 _mm512_test_epi64_mask(__m512i __A, __m512i __B)
2571 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
2572 (__v8di) __B,
2573 (__mmask8) -1);
2576 /* SIMD load ops */
2578 static __inline __m512i __DEFAULT_FN_ATTRS
2579 _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
2581 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *)__P,
2582 (__v16si)
2583 _mm512_setzero_si512 (),
2584 (__mmask16) __U);
2587 static __inline __m512i __DEFAULT_FN_ATTRS
2588 _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
2590 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *)__P,
2591 (__v8di)
2592 _mm512_setzero_si512 (),
2593 (__mmask8) __U);
2596 static __inline __m512 __DEFAULT_FN_ATTRS
2597 _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
2599 return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *)__P,
2600 (__v16sf)
2601 _mm512_setzero_ps (),
2602 (__mmask16) __U);
2605 static __inline __m512d __DEFAULT_FN_ATTRS
2606 _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
2608 return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *)__P,
2609 (__v8df)
2610 _mm512_setzero_pd (),
2611 (__mmask8) __U);
2614 static __inline __m512 __DEFAULT_FN_ATTRS
2615 _mm512_maskz_load_ps(__mmask16 __U, void const *__P)
2617 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
2618 (__v16sf)
2619 _mm512_setzero_ps (),
2620 (__mmask16) __U);
2623 static __inline __m512d __DEFAULT_FN_ATTRS
2624 _mm512_maskz_load_pd(__mmask8 __U, void const *__P)
2626 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
2627 (__v8df)
2628 _mm512_setzero_pd (),
2629 (__mmask8) __U);
2632 static __inline __m512d __DEFAULT_FN_ATTRS
2633 _mm512_loadu_pd(double const *__p)
2635 struct __loadu_pd {
2636 __m512d __v;
2637 } __attribute__((__packed__, __may_alias__));
2638 return ((struct __loadu_pd*)__p)->__v;
2641 static __inline __m512 __DEFAULT_FN_ATTRS
2642 _mm512_loadu_ps(float const *__p)
2644 struct __loadu_ps {
2645 __m512 __v;
2646 } __attribute__((__packed__, __may_alias__));
2647 return ((struct __loadu_ps*)__p)->__v;
2650 static __inline __m512 __DEFAULT_FN_ATTRS
2651 _mm512_load_ps(double const *__p)
2653 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__p,
2654 (__v16sf)
2655 _mm512_setzero_ps (),
2656 (__mmask16) -1);
2659 static __inline __m512d __DEFAULT_FN_ATTRS
2660 _mm512_load_pd(float const *__p)
2662 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__p,
2663 (__v8df)
2664 _mm512_setzero_pd (),
2665 (__mmask8) -1);
2668 /* SIMD store ops */
2670 static __inline void __DEFAULT_FN_ATTRS
2671 _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
2673 __builtin_ia32_storedqudi512_mask ((__v8di *)__P, (__v8di) __A,
2674 (__mmask8) __U);
2677 static __inline void __DEFAULT_FN_ATTRS
2678 _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
2680 __builtin_ia32_storedqusi512_mask ((__v16si *)__P, (__v16si) __A,
2681 (__mmask16) __U);
2684 static __inline void __DEFAULT_FN_ATTRS
2685 _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
2687 __builtin_ia32_storeupd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
2690 static __inline void __DEFAULT_FN_ATTRS
2691 _mm512_storeu_pd(void *__P, __m512d __A)
2693 __builtin_ia32_storeupd512_mask((__v8df *)__P, (__v8df)__A, (__mmask8)-1);
2696 static __inline void __DEFAULT_FN_ATTRS
2697 _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
2699 __builtin_ia32_storeups512_mask ((__v16sf *)__P, (__v16sf) __A,
2700 (__mmask16) __U);
2703 static __inline void __DEFAULT_FN_ATTRS
2704 _mm512_storeu_ps(void *__P, __m512 __A)
2706 __builtin_ia32_storeups512_mask((__v16sf *)__P, (__v16sf)__A, (__mmask16)-1);
2709 static __inline void __DEFAULT_FN_ATTRS
2710 _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
2712 __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
2715 static __inline void __DEFAULT_FN_ATTRS
2716 _mm512_store_pd(void *__P, __m512d __A)
2718 *(__m512d*)__P = __A;
2721 static __inline void __DEFAULT_FN_ATTRS
2722 _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
2724 __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
2725 (__mmask16) __U);
2728 static __inline void __DEFAULT_FN_ATTRS
2729 _mm512_store_ps(void *__P, __m512 __A)
2731 *(__m512*)__P = __A;
2734 /* Mask ops */
2736 static __inline __mmask16 __DEFAULT_FN_ATTRS
2737 _mm512_knot(__mmask16 __M)
2739 return __builtin_ia32_knothi(__M);
2742 /* Integer compare */
2744 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2745 _mm512_cmpeq_epi32_mask(__m512i __a, __m512i __b) {
2746 return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
2747 (__mmask16)-1);
2750 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2751 _mm512_mask_cmpeq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2752 return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
2753 __u);
2756 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2757 _mm512_cmpeq_epu32_mask(__m512i __a, __m512i __b) {
2758 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
2759 (__mmask16)-1);
2762 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2763 _mm512_mask_cmpeq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2764 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
2765 __u);
2768 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2769 _mm512_mask_cmpeq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2770 return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
2771 __u);
2774 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2775 _mm512_cmpeq_epi64_mask(__m512i __a, __m512i __b) {
2776 return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
2777 (__mmask8)-1);
2780 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2781 _mm512_cmpeq_epu64_mask(__m512i __a, __m512i __b) {
2782 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
2783 (__mmask8)-1);
2786 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2787 _mm512_mask_cmpeq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2788 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
2789 __u);
2792 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2793 _mm512_cmpge_epi32_mask(__m512i __a, __m512i __b) {
2794 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
2795 (__mmask16)-1);
2798 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2799 _mm512_mask_cmpge_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2800 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
2801 __u);
2804 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2805 _mm512_cmpge_epu32_mask(__m512i __a, __m512i __b) {
2806 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
2807 (__mmask16)-1);
2810 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2811 _mm512_mask_cmpge_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2812 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
2813 __u);
2816 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2817 _mm512_cmpge_epi64_mask(__m512i __a, __m512i __b) {
2818 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
2819 (__mmask8)-1);
2822 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2823 _mm512_mask_cmpge_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2824 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
2825 __u);
2828 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2829 _mm512_cmpge_epu64_mask(__m512i __a, __m512i __b) {
2830 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
2831 (__mmask8)-1);
2834 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2835 _mm512_mask_cmpge_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2836 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
2837 __u);
2840 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2841 _mm512_cmpgt_epi32_mask(__m512i __a, __m512i __b) {
2842 return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
2843 (__mmask16)-1);
2846 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2847 _mm512_mask_cmpgt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2848 return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
2849 __u);
2852 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2853 _mm512_cmpgt_epu32_mask(__m512i __a, __m512i __b) {
2854 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
2855 (__mmask16)-1);
2858 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2859 _mm512_mask_cmpgt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2860 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
2861 __u);
2864 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2865 _mm512_mask_cmpgt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2866 return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
2867 __u);
2870 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2871 _mm512_cmpgt_epi64_mask(__m512i __a, __m512i __b) {
2872 return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
2873 (__mmask8)-1);
2876 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2877 _mm512_cmpgt_epu64_mask(__m512i __a, __m512i __b) {
2878 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
2879 (__mmask8)-1);
2882 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2883 _mm512_mask_cmpgt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2884 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
2885 __u);
2888 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2889 _mm512_cmple_epi32_mask(__m512i __a, __m512i __b) {
2890 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
2891 (__mmask16)-1);
2894 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2895 _mm512_mask_cmple_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2896 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
2897 __u);
2900 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2901 _mm512_cmple_epu32_mask(__m512i __a, __m512i __b) {
2902 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
2903 (__mmask16)-1);
2906 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2907 _mm512_mask_cmple_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2908 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
2909 __u);
2912 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2913 _mm512_cmple_epi64_mask(__m512i __a, __m512i __b) {
2914 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
2915 (__mmask8)-1);
2918 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2919 _mm512_mask_cmple_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2920 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
2921 __u);
2924 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2925 _mm512_cmple_epu64_mask(__m512i __a, __m512i __b) {
2926 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
2927 (__mmask8)-1);
2930 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2931 _mm512_mask_cmple_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2932 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
2933 __u);
2936 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2937 _mm512_cmplt_epi32_mask(__m512i __a, __m512i __b) {
2938 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
2939 (__mmask16)-1);
2942 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2943 _mm512_mask_cmplt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2944 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
2945 __u);
2948 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2949 _mm512_cmplt_epu32_mask(__m512i __a, __m512i __b) {
2950 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
2951 (__mmask16)-1);
2954 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2955 _mm512_mask_cmplt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2956 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
2957 __u);
2960 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2961 _mm512_cmplt_epi64_mask(__m512i __a, __m512i __b) {
2962 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
2963 (__mmask8)-1);
2966 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2967 _mm512_mask_cmplt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2968 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
2969 __u);
2972 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2973 _mm512_cmplt_epu64_mask(__m512i __a, __m512i __b) {
2974 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
2975 (__mmask8)-1);
2978 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
2979 _mm512_mask_cmplt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
2980 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
2981 __u);
2984 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2985 _mm512_cmpneq_epi32_mask(__m512i __a, __m512i __b) {
2986 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
2987 (__mmask16)-1);
2990 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2991 _mm512_mask_cmpneq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
2992 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
2993 __u);
2996 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
2997 _mm512_cmpneq_epu32_mask(__m512i __a, __m512i __b) {
2998 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
2999 (__mmask16)-1);
3002 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
3003 _mm512_mask_cmpneq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
3004 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
3005 __u);
3008 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
3009 _mm512_cmpneq_epi64_mask(__m512i __a, __m512i __b) {
3010 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
3011 (__mmask8)-1);
3014 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
3015 _mm512_mask_cmpneq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
3016 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
3017 __u);
3020 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
3021 _mm512_cmpneq_epu64_mask(__m512i __a, __m512i __b) {
3022 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
3023 (__mmask8)-1);
3026 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
3027 _mm512_mask_cmpneq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
3028 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
3029 __u);
3032 #define _mm512_cmp_epi32_mask(a, b, p) __extension__ ({ \
3033 (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
3034 (__v16si)(__m512i)(b), (p), \
3035 (__mmask16)-1); })
3037 #define _mm512_cmp_epu32_mask(a, b, p) __extension__ ({ \
3038 (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
3039 (__v16si)(__m512i)(b), (p), \
3040 (__mmask16)-1); })
3042 #define _mm512_cmp_epi64_mask(a, b, p) __extension__ ({ \
3043 (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
3044 (__v8di)(__m512i)(b), (p), \
3045 (__mmask8)-1); })
3047 #define _mm512_cmp_epu64_mask(a, b, p) __extension__ ({ \
3048 (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
3049 (__v8di)(__m512i)(b), (p), \
3050 (__mmask8)-1); })
3052 #define _mm512_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
3053 (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
3054 (__v16si)(__m512i)(b), (p), \
3055 (__mmask16)(m)); })
3057 #define _mm512_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
3058 (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
3059 (__v16si)(__m512i)(b), (p), \
3060 (__mmask16)(m)); })
3062 #define _mm512_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
3063 (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
3064 (__v8di)(__m512i)(b), (p), \
3065 (__mmask8)(m)); })
3067 #define _mm512_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
3068 (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
3069 (__v8di)(__m512i)(b), (p), \
3070 (__mmask8)(m)); })
3072 #undef __DEFAULT_FN_ATTRS
3074 #endif // __AVX512FINTRIN_H