Add GCC support to ENQCMD.
[official-gcc.git] / gcc / config / i386 / avx512fintrin.h
blobe35eedb9268d1b8c2ad4b5128281edba6e4e1eb1
1 /* Copyright (C) 2013-2019 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
24 #ifndef _IMMINTRIN_H_INCLUDED
25 #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
26 #endif
28 #ifndef _AVX512FINTRIN_H_INCLUDED
29 #define _AVX512FINTRIN_H_INCLUDED
31 #ifndef __AVX512F__
32 #pragma GCC push_options
33 #pragma GCC target("avx512f")
34 #define __DISABLE_AVX512F__
35 #endif /* __AVX512F__ */
37 /* Internal data types for implementing the intrinsics. */
38 typedef double __v8df __attribute__ ((__vector_size__ (64)));
39 typedef float __v16sf __attribute__ ((__vector_size__ (64)));
40 typedef long long __v8di __attribute__ ((__vector_size__ (64)));
41 typedef unsigned long long __v8du __attribute__ ((__vector_size__ (64)));
42 typedef int __v16si __attribute__ ((__vector_size__ (64)));
43 typedef unsigned int __v16su __attribute__ ((__vector_size__ (64)));
44 typedef short __v32hi __attribute__ ((__vector_size__ (64)));
45 typedef unsigned short __v32hu __attribute__ ((__vector_size__ (64)));
46 typedef char __v64qi __attribute__ ((__vector_size__ (64)));
47 typedef unsigned char __v64qu __attribute__ ((__vector_size__ (64)));
49 /* The Intel API is flexible enough that we must allow aliasing with other
50 vector types, and their scalar components. */
51 typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
52 typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
53 typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
55 /* Unaligned version of the same type. */
56 typedef float __m512_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
57 typedef long long __m512i_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
58 typedef double __m512d_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
60 typedef unsigned char __mmask8;
61 typedef unsigned short __mmask16;
63 extern __inline __mmask16
64 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
65 _mm512_int2mask (int __M)
67 return (__mmask16) __M;
70 extern __inline int
71 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
72 _mm512_mask2int (__mmask16 __M)
74 return (int) __M;
77 extern __inline __m512i
78 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79 _mm512_set_epi64 (long long __A, long long __B, long long __C,
80 long long __D, long long __E, long long __F,
81 long long __G, long long __H)
83 return __extension__ (__m512i) (__v8di)
84 { __H, __G, __F, __E, __D, __C, __B, __A };
87 /* Create the vector [A B C D E F G H I J K L M N O P]. */
88 extern __inline __m512i
89 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
90 _mm512_set_epi32 (int __A, int __B, int __C, int __D,
91 int __E, int __F, int __G, int __H,
92 int __I, int __J, int __K, int __L,
93 int __M, int __N, int __O, int __P)
95 return __extension__ (__m512i)(__v16si)
96 { __P, __O, __N, __M, __L, __K, __J, __I,
97 __H, __G, __F, __E, __D, __C, __B, __A };
100 extern __inline __m512i
101 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
102 _mm512_set_epi16 (short __q31, short __q30, short __q29, short __q28,
103 short __q27, short __q26, short __q25, short __q24,
104 short __q23, short __q22, short __q21, short __q20,
105 short __q19, short __q18, short __q17, short __q16,
106 short __q15, short __q14, short __q13, short __q12,
107 short __q11, short __q10, short __q09, short __q08,
108 short __q07, short __q06, short __q05, short __q04,
109 short __q03, short __q02, short __q01, short __q00)
111 return __extension__ (__m512i)(__v32hi){
112 __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
113 __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15,
114 __q16, __q17, __q18, __q19, __q20, __q21, __q22, __q23,
115 __q24, __q25, __q26, __q27, __q28, __q29, __q30, __q31
119 extern __inline __m512i
120 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
121 _mm512_set_epi8 (char __q63, char __q62, char __q61, char __q60,
122 char __q59, char __q58, char __q57, char __q56,
123 char __q55, char __q54, char __q53, char __q52,
124 char __q51, char __q50, char __q49, char __q48,
125 char __q47, char __q46, char __q45, char __q44,
126 char __q43, char __q42, char __q41, char __q40,
127 char __q39, char __q38, char __q37, char __q36,
128 char __q35, char __q34, char __q33, char __q32,
129 char __q31, char __q30, char __q29, char __q28,
130 char __q27, char __q26, char __q25, char __q24,
131 char __q23, char __q22, char __q21, char __q20,
132 char __q19, char __q18, char __q17, char __q16,
133 char __q15, char __q14, char __q13, char __q12,
134 char __q11, char __q10, char __q09, char __q08,
135 char __q07, char __q06, char __q05, char __q04,
136 char __q03, char __q02, char __q01, char __q00)
138 return __extension__ (__m512i)(__v64qi){
139 __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
140 __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15,
141 __q16, __q17, __q18, __q19, __q20, __q21, __q22, __q23,
142 __q24, __q25, __q26, __q27, __q28, __q29, __q30, __q31,
143 __q32, __q33, __q34, __q35, __q36, __q37, __q38, __q39,
144 __q40, __q41, __q42, __q43, __q44, __q45, __q46, __q47,
145 __q48, __q49, __q50, __q51, __q52, __q53, __q54, __q55,
146 __q56, __q57, __q58, __q59, __q60, __q61, __q62, __q63
150 extern __inline __m512d
151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
152 _mm512_set_pd (double __A, double __B, double __C, double __D,
153 double __E, double __F, double __G, double __H)
155 return __extension__ (__m512d)
156 { __H, __G, __F, __E, __D, __C, __B, __A };
159 extern __inline __m512
160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
161 _mm512_set_ps (float __A, float __B, float __C, float __D,
162 float __E, float __F, float __G, float __H,
163 float __I, float __J, float __K, float __L,
164 float __M, float __N, float __O, float __P)
166 return __extension__ (__m512)
167 { __P, __O, __N, __M, __L, __K, __J, __I,
168 __H, __G, __F, __E, __D, __C, __B, __A };
171 #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \
172 _mm512_set_epi64(e7,e6,e5,e4,e3,e2,e1,e0)
174 #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \
175 e8,e9,e10,e11,e12,e13,e14,e15) \
176 _mm512_set_epi32(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
178 #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \
179 _mm512_set_pd(e7,e6,e5,e4,e3,e2,e1,e0)
181 #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
182 _mm512_set_ps(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
184 extern __inline __m512
185 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
186 _mm512_undefined_ps (void)
188 __m512 __Y = __Y;
189 return __Y;
192 #define _mm512_undefined _mm512_undefined_ps
194 extern __inline __m512d
195 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
196 _mm512_undefined_pd (void)
198 __m512d __Y = __Y;
199 return __Y;
202 extern __inline __m512i
203 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
204 _mm512_undefined_epi32 (void)
206 __m512i __Y = __Y;
207 return __Y;
210 #define _mm512_undefined_si512 _mm512_undefined_epi32
212 extern __inline __m512i
213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
214 _mm512_set1_epi8 (char __A)
216 return __extension__ (__m512i)(__v64qi)
217 { __A, __A, __A, __A, __A, __A, __A, __A,
218 __A, __A, __A, __A, __A, __A, __A, __A,
219 __A, __A, __A, __A, __A, __A, __A, __A,
220 __A, __A, __A, __A, __A, __A, __A, __A,
221 __A, __A, __A, __A, __A, __A, __A, __A,
222 __A, __A, __A, __A, __A, __A, __A, __A,
223 __A, __A, __A, __A, __A, __A, __A, __A,
224 __A, __A, __A, __A, __A, __A, __A, __A };
227 extern __inline __m512i
228 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
229 _mm512_set1_epi16 (short __A)
231 return __extension__ (__m512i)(__v32hi)
232 { __A, __A, __A, __A, __A, __A, __A, __A,
233 __A, __A, __A, __A, __A, __A, __A, __A,
234 __A, __A, __A, __A, __A, __A, __A, __A,
235 __A, __A, __A, __A, __A, __A, __A, __A };
238 extern __inline __m512d
239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
240 _mm512_set1_pd (double __A)
242 return (__m512d) __builtin_ia32_broadcastsd512 (__extension__
243 (__v2df) { __A, },
244 (__v8df)
245 _mm512_undefined_pd (),
246 (__mmask8) -1);
249 extern __inline __m512
250 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
251 _mm512_set1_ps (float __A)
253 return (__m512) __builtin_ia32_broadcastss512 (__extension__
254 (__v4sf) { __A, },
255 (__v16sf)
256 _mm512_undefined_ps (),
257 (__mmask16) -1);
260 /* Create the vector [A B C D A B C D A B C D A B C D]. */
261 extern __inline __m512i
262 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
263 _mm512_set4_epi32 (int __A, int __B, int __C, int __D)
265 return __extension__ (__m512i)(__v16si)
266 { __D, __C, __B, __A, __D, __C, __B, __A,
267 __D, __C, __B, __A, __D, __C, __B, __A };
270 extern __inline __m512i
271 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
272 _mm512_set4_epi64 (long long __A, long long __B, long long __C,
273 long long __D)
275 return __extension__ (__m512i) (__v8di)
276 { __D, __C, __B, __A, __D, __C, __B, __A };
279 extern __inline __m512d
280 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
281 _mm512_set4_pd (double __A, double __B, double __C, double __D)
283 return __extension__ (__m512d)
284 { __D, __C, __B, __A, __D, __C, __B, __A };
287 extern __inline __m512
288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
289 _mm512_set4_ps (float __A, float __B, float __C, float __D)
291 return __extension__ (__m512)
292 { __D, __C, __B, __A, __D, __C, __B, __A,
293 __D, __C, __B, __A, __D, __C, __B, __A };
296 #define _mm512_setr4_epi64(e0,e1,e2,e3) \
297 _mm512_set4_epi64(e3,e2,e1,e0)
299 #define _mm512_setr4_epi32(e0,e1,e2,e3) \
300 _mm512_set4_epi32(e3,e2,e1,e0)
302 #define _mm512_setr4_pd(e0,e1,e2,e3) \
303 _mm512_set4_pd(e3,e2,e1,e0)
305 #define _mm512_setr4_ps(e0,e1,e2,e3) \
306 _mm512_set4_ps(e3,e2,e1,e0)
308 extern __inline __m512
309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
310 _mm512_setzero_ps (void)
312 return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
313 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
316 extern __inline __m512
317 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
318 _mm512_setzero (void)
320 return _mm512_setzero_ps ();
323 extern __inline __m512d
324 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
325 _mm512_setzero_pd (void)
327 return __extension__ (__m512d) { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
330 extern __inline __m512i
331 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
332 _mm512_setzero_epi32 (void)
334 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
337 extern __inline __m512i
338 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
339 _mm512_setzero_si512 (void)
341 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
344 extern __inline __m512d
345 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
346 _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
348 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
349 (__v8df) __W,
350 (__mmask8) __U);
353 extern __inline __m512d
354 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
355 _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
357 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
358 (__v8df)
359 _mm512_setzero_pd (),
360 (__mmask8) __U);
363 extern __inline __m512
364 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
365 _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
367 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
368 (__v16sf) __W,
369 (__mmask16) __U);
372 extern __inline __m512
373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
374 _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
376 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
377 (__v16sf)
378 _mm512_setzero_ps (),
379 (__mmask16) __U);
382 extern __inline __m512d
383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
384 _mm512_load_pd (void const *__P)
386 return *(__m512d *) __P;
389 extern __inline __m512d
390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
391 _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
393 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
394 (__v8df) __W,
395 (__mmask8) __U);
398 extern __inline __m512d
399 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
400 _mm512_maskz_load_pd (__mmask8 __U, void const *__P)
402 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
403 (__v8df)
404 _mm512_setzero_pd (),
405 (__mmask8) __U);
408 extern __inline void
409 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
410 _mm512_store_pd (void *__P, __m512d __A)
412 *(__m512d *) __P = __A;
415 extern __inline void
416 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
417 _mm512_mask_store_pd (void *__P, __mmask8 __U, __m512d __A)
419 __builtin_ia32_storeapd512_mask ((__v8df *) __P, (__v8df) __A,
420 (__mmask8) __U);
423 extern __inline __m512
424 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
425 _mm512_load_ps (void const *__P)
427 return *(__m512 *) __P;
430 extern __inline __m512
431 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
432 _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
434 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
435 (__v16sf) __W,
436 (__mmask16) __U);
439 extern __inline __m512
440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
441 _mm512_maskz_load_ps (__mmask16 __U, void const *__P)
443 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
444 (__v16sf)
445 _mm512_setzero_ps (),
446 (__mmask16) __U);
449 extern __inline void
450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
451 _mm512_store_ps (void *__P, __m512 __A)
453 *(__m512 *) __P = __A;
456 extern __inline void
457 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
458 _mm512_mask_store_ps (void *__P, __mmask16 __U, __m512 __A)
460 __builtin_ia32_storeaps512_mask ((__v16sf *) __P, (__v16sf) __A,
461 (__mmask16) __U);
464 extern __inline __m512i
465 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
466 _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
468 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
469 (__v8di) __W,
470 (__mmask8) __U);
473 extern __inline __m512i
474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
475 _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
477 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
478 (__v8di)
479 _mm512_setzero_si512 (),
480 (__mmask8) __U);
483 extern __inline __m512i
484 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
485 _mm512_load_epi64 (void const *__P)
487 return *(__m512i *) __P;
490 extern __inline __m512i
491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
492 _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
494 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
495 (__v8di) __W,
496 (__mmask8) __U);
499 extern __inline __m512i
500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
501 _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
503 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
504 (__v8di)
505 _mm512_setzero_si512 (),
506 (__mmask8) __U);
509 extern __inline void
510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
511 _mm512_store_epi64 (void *__P, __m512i __A)
513 *(__m512i *) __P = __A;
516 extern __inline void
517 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
518 _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
520 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
521 (__mmask8) __U);
524 extern __inline __m512i
525 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
526 _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
528 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
529 (__v16si) __W,
530 (__mmask16) __U);
533 extern __inline __m512i
534 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
535 _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
537 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
538 (__v16si)
539 _mm512_setzero_si512 (),
540 (__mmask16) __U);
543 extern __inline __m512i
544 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
545 _mm512_load_si512 (void const *__P)
547 return *(__m512i *) __P;
550 extern __inline __m512i
551 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
552 _mm512_load_epi32 (void const *__P)
554 return *(__m512i *) __P;
557 extern __inline __m512i
558 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
559 _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
561 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
562 (__v16si) __W,
563 (__mmask16) __U);
566 extern __inline __m512i
567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
568 _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
570 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
571 (__v16si)
572 _mm512_setzero_si512 (),
573 (__mmask16) __U);
576 extern __inline void
577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
578 _mm512_store_si512 (void *__P, __m512i __A)
580 *(__m512i *) __P = __A;
583 extern __inline void
584 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
585 _mm512_store_epi32 (void *__P, __m512i __A)
587 *(__m512i *) __P = __A;
590 extern __inline void
591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
592 _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
594 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
595 (__mmask16) __U);
598 extern __inline __m512i
599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
600 _mm512_mullo_epi32 (__m512i __A, __m512i __B)
602 return (__m512i) ((__v16su) __A * (__v16su) __B);
605 extern __inline __m512i
606 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
607 _mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
609 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
610 (__v16si) __B,
611 (__v16si)
612 _mm512_setzero_si512 (),
613 __M);
616 extern __inline __m512i
617 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
618 _mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
620 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
621 (__v16si) __B,
622 (__v16si) __W, __M);
625 extern __inline __m512i
626 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
627 _mm512_mullox_epi64 (__m512i __A, __m512i __B)
629 return (__m512i) ((__v8du) __A * (__v8du) __B);
632 extern __inline __m512i
633 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
634 _mm512_mask_mullox_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
636 return _mm512_mask_mov_epi64 (__W, __M, _mm512_mullox_epi64 (__A, __B));
639 extern __inline __m512i
640 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
641 _mm512_sllv_epi32 (__m512i __X, __m512i __Y)
643 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
644 (__v16si) __Y,
645 (__v16si)
646 _mm512_undefined_epi32 (),
647 (__mmask16) -1);
650 extern __inline __m512i
651 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
652 _mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
654 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
655 (__v16si) __Y,
656 (__v16si) __W,
657 (__mmask16) __U);
660 extern __inline __m512i
661 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
662 _mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
664 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
665 (__v16si) __Y,
666 (__v16si)
667 _mm512_setzero_si512 (),
668 (__mmask16) __U);
671 extern __inline __m512i
672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
673 _mm512_srav_epi32 (__m512i __X, __m512i __Y)
675 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
676 (__v16si) __Y,
677 (__v16si)
678 _mm512_undefined_epi32 (),
679 (__mmask16) -1);
682 extern __inline __m512i
683 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
684 _mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
686 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
687 (__v16si) __Y,
688 (__v16si) __W,
689 (__mmask16) __U);
692 extern __inline __m512i
693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
694 _mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
696 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
697 (__v16si) __Y,
698 (__v16si)
699 _mm512_setzero_si512 (),
700 (__mmask16) __U);
703 extern __inline __m512i
704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
705 _mm512_srlv_epi32 (__m512i __X, __m512i __Y)
707 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
708 (__v16si) __Y,
709 (__v16si)
710 _mm512_undefined_epi32 (),
711 (__mmask16) -1);
714 extern __inline __m512i
715 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
716 _mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
718 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
719 (__v16si) __Y,
720 (__v16si) __W,
721 (__mmask16) __U);
724 extern __inline __m512i
725 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
726 _mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
728 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
729 (__v16si) __Y,
730 (__v16si)
731 _mm512_setzero_si512 (),
732 (__mmask16) __U);
735 extern __inline __m512i
736 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
737 _mm512_add_epi64 (__m512i __A, __m512i __B)
739 return (__m512i) ((__v8du) __A + (__v8du) __B);
742 extern __inline __m512i
743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
744 _mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
746 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
747 (__v8di) __B,
748 (__v8di) __W,
749 (__mmask8) __U);
752 extern __inline __m512i
753 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
754 _mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
756 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
757 (__v8di) __B,
758 (__v8di)
759 _mm512_setzero_si512 (),
760 (__mmask8) __U);
763 extern __inline __m512i
764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
765 _mm512_sub_epi64 (__m512i __A, __m512i __B)
767 return (__m512i) ((__v8du) __A - (__v8du) __B);
770 extern __inline __m512i
771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
772 _mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
774 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
775 (__v8di) __B,
776 (__v8di) __W,
777 (__mmask8) __U);
780 extern __inline __m512i
781 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
782 _mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
784 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
785 (__v8di) __B,
786 (__v8di)
787 _mm512_setzero_si512 (),
788 (__mmask8) __U);
791 extern __inline __m512i
792 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
793 _mm512_sllv_epi64 (__m512i __X, __m512i __Y)
795 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
796 (__v8di) __Y,
797 (__v8di)
798 _mm512_undefined_pd (),
799 (__mmask8) -1);
802 extern __inline __m512i
803 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
804 _mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
806 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
807 (__v8di) __Y,
808 (__v8di) __W,
809 (__mmask8) __U);
812 extern __inline __m512i
813 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
814 _mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
816 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
817 (__v8di) __Y,
818 (__v8di)
819 _mm512_setzero_si512 (),
820 (__mmask8) __U);
823 extern __inline __m512i
824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
825 _mm512_srav_epi64 (__m512i __X, __m512i __Y)
827 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
828 (__v8di) __Y,
829 (__v8di)
830 _mm512_undefined_epi32 (),
831 (__mmask8) -1);
834 extern __inline __m512i
835 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
836 _mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
838 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
839 (__v8di) __Y,
840 (__v8di) __W,
841 (__mmask8) __U);
844 extern __inline __m512i
845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
846 _mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
848 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
849 (__v8di) __Y,
850 (__v8di)
851 _mm512_setzero_si512 (),
852 (__mmask8) __U);
855 extern __inline __m512i
856 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
857 _mm512_srlv_epi64 (__m512i __X, __m512i __Y)
859 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
860 (__v8di) __Y,
861 (__v8di)
862 _mm512_undefined_epi32 (),
863 (__mmask8) -1);
866 extern __inline __m512i
867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
868 _mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
870 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
871 (__v8di) __Y,
872 (__v8di) __W,
873 (__mmask8) __U);
876 extern __inline __m512i
877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
878 _mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
880 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
881 (__v8di) __Y,
882 (__v8di)
883 _mm512_setzero_si512 (),
884 (__mmask8) __U);
887 extern __inline __m512i
888 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
889 _mm512_add_epi32 (__m512i __A, __m512i __B)
891 return (__m512i) ((__v16su) __A + (__v16su) __B);
894 extern __inline __m512i
895 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
896 _mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
898 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
899 (__v16si) __B,
900 (__v16si) __W,
901 (__mmask16) __U);
904 extern __inline __m512i
905 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
906 _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
908 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
909 (__v16si) __B,
910 (__v16si)
911 _mm512_setzero_si512 (),
912 (__mmask16) __U);
915 extern __inline __m512i
916 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
917 _mm512_mul_epi32 (__m512i __X, __m512i __Y)
919 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
920 (__v16si) __Y,
921 (__v8di)
922 _mm512_undefined_epi32 (),
923 (__mmask8) -1);
926 extern __inline __m512i
927 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
928 _mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
930 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
931 (__v16si) __Y,
932 (__v8di) __W, __M);
935 extern __inline __m512i
936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
937 _mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
939 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
940 (__v16si) __Y,
941 (__v8di)
942 _mm512_setzero_si512 (),
943 __M);
946 extern __inline __m512i
947 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
948 _mm512_sub_epi32 (__m512i __A, __m512i __B)
950 return (__m512i) ((__v16su) __A - (__v16su) __B);
953 extern __inline __m512i
954 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
955 _mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
957 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
958 (__v16si) __B,
959 (__v16si) __W,
960 (__mmask16) __U);
963 extern __inline __m512i
964 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
965 _mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
967 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
968 (__v16si) __B,
969 (__v16si)
970 _mm512_setzero_si512 (),
971 (__mmask16) __U);
974 extern __inline __m512i
975 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
976 _mm512_mul_epu32 (__m512i __X, __m512i __Y)
978 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
979 (__v16si) __Y,
980 (__v8di)
981 _mm512_undefined_epi32 (),
982 (__mmask8) -1);
985 extern __inline __m512i
986 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
987 _mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
989 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
990 (__v16si) __Y,
991 (__v8di) __W, __M);
994 extern __inline __m512i
995 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
996 _mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
998 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
999 (__v16si) __Y,
1000 (__v8di)
1001 _mm512_setzero_si512 (),
1002 __M);
1005 #ifdef __OPTIMIZE__
1006 extern __inline __m512i
1007 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1008 _mm512_slli_epi64 (__m512i __A, unsigned int __B)
1010 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
1011 (__v8di)
1012 _mm512_undefined_epi32 (),
1013 (__mmask8) -1);
1016 extern __inline __m512i
1017 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1018 _mm512_mask_slli_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
1019 unsigned int __B)
1021 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
1022 (__v8di) __W,
1023 (__mmask8) __U);
1026 extern __inline __m512i
1027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1028 _mm512_maskz_slli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1030 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
1031 (__v8di)
1032 _mm512_setzero_si512 (),
1033 (__mmask8) __U);
1035 #else
1036 #define _mm512_slli_epi64(X, C) \
1037 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1038 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
1039 (__mmask8)-1))
1041 #define _mm512_mask_slli_epi64(W, U, X, C) \
1042 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1043 (__v8di)(__m512i)(W),\
1044 (__mmask8)(U)))
1046 #define _mm512_maskz_slli_epi64(U, X, C) \
1047 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1048 (__v8di)(__m512i)_mm512_setzero_si512 (),\
1049 (__mmask8)(U)))
1050 #endif
1052 extern __inline __m512i
1053 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1054 _mm512_sll_epi64 (__m512i __A, __m128i __B)
1056 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
1057 (__v2di) __B,
1058 (__v8di)
1059 _mm512_undefined_epi32 (),
1060 (__mmask8) -1);
1063 extern __inline __m512i
1064 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1065 _mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1067 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
1068 (__v2di) __B,
1069 (__v8di) __W,
1070 (__mmask8) __U);
1073 extern __inline __m512i
1074 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1075 _mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1077 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
1078 (__v2di) __B,
1079 (__v8di)
1080 _mm512_setzero_si512 (),
1081 (__mmask8) __U);
1084 #ifdef __OPTIMIZE__
1085 extern __inline __m512i
1086 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1087 _mm512_srli_epi64 (__m512i __A, unsigned int __B)
1089 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1090 (__v8di)
1091 _mm512_undefined_epi32 (),
1092 (__mmask8) -1);
1095 extern __inline __m512i
1096 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1097 _mm512_mask_srli_epi64 (__m512i __W, __mmask8 __U,
1098 __m512i __A, unsigned int __B)
1100 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1101 (__v8di) __W,
1102 (__mmask8) __U);
1105 extern __inline __m512i
1106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1107 _mm512_maskz_srli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1109 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1110 (__v8di)
1111 _mm512_setzero_si512 (),
1112 (__mmask8) __U);
1114 #else
1115 #define _mm512_srli_epi64(X, C) \
1116 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1117 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
1118 (__mmask8)-1))
1120 #define _mm512_mask_srli_epi64(W, U, X, C) \
1121 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1122 (__v8di)(__m512i)(W),\
1123 (__mmask8)(U)))
1125 #define _mm512_maskz_srli_epi64(U, X, C) \
1126 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1127 (__v8di)(__m512i)_mm512_setzero_si512 (),\
1128 (__mmask8)(U)))
1129 #endif
1131 extern __inline __m512i
1132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1133 _mm512_srl_epi64 (__m512i __A, __m128i __B)
1135 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1136 (__v2di) __B,
1137 (__v8di)
1138 _mm512_undefined_epi32 (),
1139 (__mmask8) -1);
1142 extern __inline __m512i
1143 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1144 _mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1146 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1147 (__v2di) __B,
1148 (__v8di) __W,
1149 (__mmask8) __U);
1152 extern __inline __m512i
1153 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1154 _mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1156 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1157 (__v2di) __B,
1158 (__v8di)
1159 _mm512_setzero_si512 (),
1160 (__mmask8) __U);
1163 #ifdef __OPTIMIZE__
1164 extern __inline __m512i
1165 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1166 _mm512_srai_epi64 (__m512i __A, unsigned int __B)
1168 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1169 (__v8di)
1170 _mm512_undefined_epi32 (),
1171 (__mmask8) -1);
1174 extern __inline __m512i
1175 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1176 _mm512_mask_srai_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
1177 unsigned int __B)
1179 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1180 (__v8di) __W,
1181 (__mmask8) __U);
1184 extern __inline __m512i
1185 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1186 _mm512_maskz_srai_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1188 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1189 (__v8di)
1190 _mm512_setzero_si512 (),
1191 (__mmask8) __U);
1193 #else
1194 #define _mm512_srai_epi64(X, C) \
1195 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1196 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
1197 (__mmask8)-1))
1199 #define _mm512_mask_srai_epi64(W, U, X, C) \
1200 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1201 (__v8di)(__m512i)(W),\
1202 (__mmask8)(U)))
1204 #define _mm512_maskz_srai_epi64(U, X, C) \
1205 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1206 (__v8di)(__m512i)_mm512_setzero_si512 (),\
1207 (__mmask8)(U)))
1208 #endif
1210 extern __inline __m512i
1211 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1212 _mm512_sra_epi64 (__m512i __A, __m128i __B)
1214 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1215 (__v2di) __B,
1216 (__v8di)
1217 _mm512_undefined_epi32 (),
1218 (__mmask8) -1);
1221 extern __inline __m512i
1222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1223 _mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1225 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1226 (__v2di) __B,
1227 (__v8di) __W,
1228 (__mmask8) __U);
1231 extern __inline __m512i
1232 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1233 _mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1235 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1236 (__v2di) __B,
1237 (__v8di)
1238 _mm512_setzero_si512 (),
1239 (__mmask8) __U);
1242 #ifdef __OPTIMIZE__
1243 extern __inline __m512i
1244 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1245 _mm512_slli_epi32 (__m512i __A, unsigned int __B)
1247 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1248 (__v16si)
1249 _mm512_undefined_epi32 (),
1250 (__mmask16) -1);
1253 extern __inline __m512i
1254 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1255 _mm512_mask_slli_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1256 unsigned int __B)
1258 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1259 (__v16si) __W,
1260 (__mmask16) __U);
1263 extern __inline __m512i
1264 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1265 _mm512_maskz_slli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1267 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1268 (__v16si)
1269 _mm512_setzero_si512 (),
1270 (__mmask16) __U);
1272 #else
1273 #define _mm512_slli_epi32(X, C) \
1274 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1275 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
1276 (__mmask16)-1))
1278 #define _mm512_mask_slli_epi32(W, U, X, C) \
1279 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1280 (__v16si)(__m512i)(W),\
1281 (__mmask16)(U)))
1283 #define _mm512_maskz_slli_epi32(U, X, C) \
1284 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1285 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1286 (__mmask16)(U)))
1287 #endif
1289 extern __inline __m512i
1290 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1291 _mm512_sll_epi32 (__m512i __A, __m128i __B)
1293 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1294 (__v4si) __B,
1295 (__v16si)
1296 _mm512_undefined_epi32 (),
1297 (__mmask16) -1);
1300 extern __inline __m512i
1301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1302 _mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1304 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1305 (__v4si) __B,
1306 (__v16si) __W,
1307 (__mmask16) __U);
1310 extern __inline __m512i
1311 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1312 _mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1314 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1315 (__v4si) __B,
1316 (__v16si)
1317 _mm512_setzero_si512 (),
1318 (__mmask16) __U);
1321 #ifdef __OPTIMIZE__
1322 extern __inline __m512i
1323 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1324 _mm512_srli_epi32 (__m512i __A, unsigned int __B)
1326 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1327 (__v16si)
1328 _mm512_undefined_epi32 (),
1329 (__mmask16) -1);
1332 extern __inline __m512i
1333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1334 _mm512_mask_srli_epi32 (__m512i __W, __mmask16 __U,
1335 __m512i __A, unsigned int __B)
1337 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1338 (__v16si) __W,
1339 (__mmask16) __U);
1342 extern __inline __m512i
1343 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1344 _mm512_maskz_srli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1346 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1347 (__v16si)
1348 _mm512_setzero_si512 (),
1349 (__mmask16) __U);
1351 #else
1352 #define _mm512_srli_epi32(X, C) \
1353 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1354 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
1355 (__mmask16)-1))
1357 #define _mm512_mask_srli_epi32(W, U, X, C) \
1358 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1359 (__v16si)(__m512i)(W),\
1360 (__mmask16)(U)))
1362 #define _mm512_maskz_srli_epi32(U, X, C) \
1363 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1364 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1365 (__mmask16)(U)))
1366 #endif
1368 extern __inline __m512i
1369 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1370 _mm512_srl_epi32 (__m512i __A, __m128i __B)
1372 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1373 (__v4si) __B,
1374 (__v16si)
1375 _mm512_undefined_epi32 (),
1376 (__mmask16) -1);
1379 extern __inline __m512i
1380 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1381 _mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1383 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1384 (__v4si) __B,
1385 (__v16si) __W,
1386 (__mmask16) __U);
1389 extern __inline __m512i
1390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1391 _mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1393 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1394 (__v4si) __B,
1395 (__v16si)
1396 _mm512_setzero_si512 (),
1397 (__mmask16) __U);
1400 #ifdef __OPTIMIZE__
1401 extern __inline __m512i
1402 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1403 _mm512_srai_epi32 (__m512i __A, unsigned int __B)
1405 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1406 (__v16si)
1407 _mm512_undefined_epi32 (),
1408 (__mmask16) -1);
1411 extern __inline __m512i
1412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1413 _mm512_mask_srai_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1414 unsigned int __B)
1416 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1417 (__v16si) __W,
1418 (__mmask16) __U);
1421 extern __inline __m512i
1422 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1423 _mm512_maskz_srai_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1425 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1426 (__v16si)
1427 _mm512_setzero_si512 (),
1428 (__mmask16) __U);
1430 #else
1431 #define _mm512_srai_epi32(X, C) \
1432 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1433 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
1434 (__mmask16)-1))
1436 #define _mm512_mask_srai_epi32(W, U, X, C) \
1437 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1438 (__v16si)(__m512i)(W),\
1439 (__mmask16)(U)))
1441 #define _mm512_maskz_srai_epi32(U, X, C) \
1442 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1443 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1444 (__mmask16)(U)))
1445 #endif
1447 extern __inline __m512i
1448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1449 _mm512_sra_epi32 (__m512i __A, __m128i __B)
1451 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1452 (__v4si) __B,
1453 (__v16si)
1454 _mm512_undefined_epi32 (),
1455 (__mmask16) -1);
1458 extern __inline __m512i
1459 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1460 _mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1462 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1463 (__v4si) __B,
1464 (__v16si) __W,
1465 (__mmask16) __U);
1468 extern __inline __m512i
1469 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1470 _mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1472 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1473 (__v4si) __B,
1474 (__v16si)
1475 _mm512_setzero_si512 (),
1476 (__mmask16) __U);
1479 #ifdef __OPTIMIZE__
1480 extern __inline __m128d
1481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1482 _mm_add_round_sd (__m128d __A, __m128d __B, const int __R)
1484 return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
1485 (__v2df) __B,
1486 __R);
1489 extern __inline __m128d
1490 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1491 _mm_mask_add_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
1492 __m128d __B, const int __R)
1494 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
1495 (__v2df) __B,
1496 (__v2df) __W,
1497 (__mmask8) __U, __R);
1500 extern __inline __m128d
1501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1502 _mm_maskz_add_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
1503 const int __R)
1505 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
1506 (__v2df) __B,
1507 (__v2df)
1508 _mm_setzero_pd (),
1509 (__mmask8) __U, __R);
1512 extern __inline __m128
1513 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1514 _mm_add_round_ss (__m128 __A, __m128 __B, const int __R)
1516 return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
1517 (__v4sf) __B,
1518 __R);
1521 extern __inline __m128
1522 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1523 _mm_mask_add_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
1524 __m128 __B, const int __R)
1526 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
1527 (__v4sf) __B,
1528 (__v4sf) __W,
1529 (__mmask8) __U, __R);
1532 extern __inline __m128
1533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1534 _mm_maskz_add_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
1535 const int __R)
1537 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
1538 (__v4sf) __B,
1539 (__v4sf)
1540 _mm_setzero_ps (),
1541 (__mmask8) __U, __R);
1544 extern __inline __m128d
1545 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1546 _mm_sub_round_sd (__m128d __A, __m128d __B, const int __R)
1548 return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
1549 (__v2df) __B,
1550 __R);
1553 extern __inline __m128d
1554 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1555 _mm_mask_sub_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
1556 __m128d __B, const int __R)
1558 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
1559 (__v2df) __B,
1560 (__v2df) __W,
1561 (__mmask8) __U, __R);
1564 extern __inline __m128d
1565 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1566 _mm_maskz_sub_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
1567 const int __R)
1569 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
1570 (__v2df) __B,
1571 (__v2df)
1572 _mm_setzero_pd (),
1573 (__mmask8) __U, __R);
1576 extern __inline __m128
1577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1578 _mm_sub_round_ss (__m128 __A, __m128 __B, const int __R)
1580 return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
1581 (__v4sf) __B,
1582 __R);
1585 extern __inline __m128
1586 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1587 _mm_mask_sub_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
1588 __m128 __B, const int __R)
1590 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
1591 (__v4sf) __B,
1592 (__v4sf) __W,
1593 (__mmask8) __U, __R);
1596 extern __inline __m128
1597 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1598 _mm_maskz_sub_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
1599 const int __R)
1601 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
1602 (__v4sf) __B,
1603 (__v4sf)
1604 _mm_setzero_ps (),
1605 (__mmask8) __U, __R);
1608 #else
1609 #define _mm_add_round_sd(A, B, C) \
1610 (__m128d)__builtin_ia32_addsd_round(A, B, C)
1612 #define _mm_mask_add_round_sd(W, U, A, B, C) \
1613 (__m128d)__builtin_ia32_addsd_mask_round(A, B, W, U, C)
1615 #define _mm_maskz_add_round_sd(U, A, B, C) \
1616 (__m128d)__builtin_ia32_addsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
1618 #define _mm_add_round_ss(A, B, C) \
1619 (__m128)__builtin_ia32_addss_round(A, B, C)
1621 #define _mm_mask_add_round_ss(W, U, A, B, C) \
1622 (__m128)__builtin_ia32_addss_mask_round(A, B, W, U, C)
1624 #define _mm_maskz_add_round_ss(U, A, B, C) \
1625 (__m128)__builtin_ia32_addss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
1627 #define _mm_sub_round_sd(A, B, C) \
1628 (__m128d)__builtin_ia32_subsd_round(A, B, C)
1630 #define _mm_mask_sub_round_sd(W, U, A, B, C) \
1631 (__m128d)__builtin_ia32_subsd_mask_round(A, B, W, U, C)
1633 #define _mm_maskz_sub_round_sd(U, A, B, C) \
1634 (__m128d)__builtin_ia32_subsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
1636 #define _mm_sub_round_ss(A, B, C) \
1637 (__m128)__builtin_ia32_subss_round(A, B, C)
1639 #define _mm_mask_sub_round_ss(W, U, A, B, C) \
1640 (__m128)__builtin_ia32_subss_mask_round(A, B, W, U, C)
1642 #define _mm_maskz_sub_round_ss(U, A, B, C) \
1643 (__m128)__builtin_ia32_subss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
1645 #endif
1647 #ifdef __OPTIMIZE__
1648 extern __inline __m512i
1649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1650 _mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C,
1651 const int __imm)
1653 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1654 (__v8di) __B,
1655 (__v8di) __C, __imm,
1656 (__mmask8) -1);
1659 extern __inline __m512i
1660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1661 _mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B,
1662 __m512i __C, const int __imm)
1664 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1665 (__v8di) __B,
1666 (__v8di) __C, __imm,
1667 (__mmask8) __U);
1670 extern __inline __m512i
1671 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1672 _mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
1673 __m512i __C, const int __imm)
1675 return (__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di) __A,
1676 (__v8di) __B,
1677 (__v8di) __C,
1678 __imm, (__mmask8) __U);
1681 extern __inline __m512i
1682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1683 _mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C,
1684 const int __imm)
1686 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1687 (__v16si) __B,
1688 (__v16si) __C,
1689 __imm, (__mmask16) -1);
1692 extern __inline __m512i
1693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1694 _mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
1695 __m512i __C, const int __imm)
1697 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1698 (__v16si) __B,
1699 (__v16si) __C,
1700 __imm, (__mmask16) __U);
1703 extern __inline __m512i
1704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1705 _mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
1706 __m512i __C, const int __imm)
1708 return (__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si) __A,
1709 (__v16si) __B,
1710 (__v16si) __C,
1711 __imm, (__mmask16) __U);
1713 #else
1714 #define _mm512_ternarylogic_epi64(A, B, C, I) \
1715 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
1716 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)-1))
1717 #define _mm512_mask_ternarylogic_epi64(A, U, B, C, I) \
1718 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
1719 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1720 #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I) \
1721 ((__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di)(__m512i)(A), \
1722 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1723 #define _mm512_ternarylogic_epi32(A, B, C, I) \
1724 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
1725 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1726 (__mmask16)-1))
1727 #define _mm512_mask_ternarylogic_epi32(A, U, B, C, I) \
1728 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
1729 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1730 (__mmask16)(U)))
1731 #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I) \
1732 ((__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si)(__m512i)(A), \
1733 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1734 (__mmask16)(U)))
1735 #endif
1737 extern __inline __m512d
1738 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1739 _mm512_rcp14_pd (__m512d __A)
1741 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1742 (__v8df)
1743 _mm512_undefined_pd (),
1744 (__mmask8) -1);
1747 extern __inline __m512d
1748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1749 _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1751 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1752 (__v8df) __W,
1753 (__mmask8) __U);
1756 extern __inline __m512d
1757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1758 _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
1760 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1761 (__v8df)
1762 _mm512_setzero_pd (),
1763 (__mmask8) __U);
1766 extern __inline __m512
1767 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1768 _mm512_rcp14_ps (__m512 __A)
1770 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1771 (__v16sf)
1772 _mm512_undefined_ps (),
1773 (__mmask16) -1);
1776 extern __inline __m512
1777 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1778 _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1780 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1781 (__v16sf) __W,
1782 (__mmask16) __U);
1785 extern __inline __m512
1786 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1787 _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
1789 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1790 (__v16sf)
1791 _mm512_setzero_ps (),
1792 (__mmask16) __U);
1795 extern __inline __m128d
1796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1797 _mm_rcp14_sd (__m128d __A, __m128d __B)
1799 return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B,
1800 (__v2df) __A);
1803 extern __inline __m128d
1804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1805 _mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1807 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __B,
1808 (__v2df) __A,
1809 (__v2df) __W,
1810 (__mmask8) __U);
1813 extern __inline __m128d
1814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1815 _mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1817 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __B,
1818 (__v2df) __A,
1819 (__v2df) _mm_setzero_ps (),
1820 (__mmask8) __U);
1823 extern __inline __m128
1824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1825 _mm_rcp14_ss (__m128 __A, __m128 __B)
1827 return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B,
1828 (__v4sf) __A);
1831 extern __inline __m128
1832 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1833 _mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1835 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __B,
1836 (__v4sf) __A,
1837 (__v4sf) __W,
1838 (__mmask8) __U);
1841 extern __inline __m128
1842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1843 _mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1845 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __B,
1846 (__v4sf) __A,
1847 (__v4sf) _mm_setzero_ps (),
1848 (__mmask8) __U);
1851 extern __inline __m512d
1852 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1853 _mm512_rsqrt14_pd (__m512d __A)
1855 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1856 (__v8df)
1857 _mm512_undefined_pd (),
1858 (__mmask8) -1);
1861 extern __inline __m512d
1862 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1863 _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1865 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1866 (__v8df) __W,
1867 (__mmask8) __U);
1870 extern __inline __m512d
1871 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1872 _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
1874 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1875 (__v8df)
1876 _mm512_setzero_pd (),
1877 (__mmask8) __U);
1880 extern __inline __m512
1881 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1882 _mm512_rsqrt14_ps (__m512 __A)
1884 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1885 (__v16sf)
1886 _mm512_undefined_ps (),
1887 (__mmask16) -1);
1890 extern __inline __m512
1891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1892 _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1894 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1895 (__v16sf) __W,
1896 (__mmask16) __U);
1899 extern __inline __m512
1900 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1901 _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
1903 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1904 (__v16sf)
1905 _mm512_setzero_ps (),
1906 (__mmask16) __U);
1909 extern __inline __m128d
1910 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1911 _mm_rsqrt14_sd (__m128d __A, __m128d __B)
1913 return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __B,
1914 (__v2df) __A);
1917 extern __inline __m128d
1918 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1919 _mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1921 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B,
1922 (__v2df) __A,
1923 (__v2df) __W,
1924 (__mmask8) __U);
1927 extern __inline __m128d
1928 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1929 _mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1931 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B,
1932 (__v2df) __A,
1933 (__v2df) _mm_setzero_pd (),
1934 (__mmask8) __U);
1937 extern __inline __m128
1938 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1939 _mm_rsqrt14_ss (__m128 __A, __m128 __B)
1941 return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __B,
1942 (__v4sf) __A);
1945 extern __inline __m128
1946 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1947 _mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1949 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B,
1950 (__v4sf) __A,
1951 (__v4sf) __W,
1952 (__mmask8) __U);
1955 extern __inline __m128
1956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1957 _mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1959 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B,
1960 (__v4sf) __A,
1961 (__v4sf) _mm_setzero_ps (),
1962 (__mmask8) __U);
1965 #ifdef __OPTIMIZE__
1966 extern __inline __m512d
1967 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1968 _mm512_sqrt_round_pd (__m512d __A, const int __R)
1970 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1971 (__v8df)
1972 _mm512_undefined_pd (),
1973 (__mmask8) -1, __R);
1976 extern __inline __m512d
1977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1978 _mm512_mask_sqrt_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
1979 const int __R)
1981 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1982 (__v8df) __W,
1983 (__mmask8) __U, __R);
1986 extern __inline __m512d
1987 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1988 _mm512_maskz_sqrt_round_pd (__mmask8 __U, __m512d __A, const int __R)
1990 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1991 (__v8df)
1992 _mm512_setzero_pd (),
1993 (__mmask8) __U, __R);
1996 extern __inline __m512
1997 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1998 _mm512_sqrt_round_ps (__m512 __A, const int __R)
2000 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
2001 (__v16sf)
2002 _mm512_undefined_ps (),
2003 (__mmask16) -1, __R);
2006 extern __inline __m512
2007 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2008 _mm512_mask_sqrt_round_ps (__m512 __W, __mmask16 __U, __m512 __A, const int __R)
2010 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
2011 (__v16sf) __W,
2012 (__mmask16) __U, __R);
2015 extern __inline __m512
2016 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2017 _mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R)
2019 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
2020 (__v16sf)
2021 _mm512_setzero_ps (),
2022 (__mmask16) __U, __R);
2025 extern __inline __m128d
2026 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2027 _mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R)
2029 return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B,
2030 (__v2df) __A,
2031 (__v2df)
2032 _mm_setzero_pd (),
2033 (__mmask8) -1, __R);
2036 extern __inline __m128d
2037 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2038 _mm_mask_sqrt_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
2039 const int __R)
2041 return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B,
2042 (__v2df) __A,
2043 (__v2df) __W,
2044 (__mmask8) __U, __R);
2047 extern __inline __m128d
2048 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2049 _mm_maskz_sqrt_round_sd (__mmask8 __U, __m128d __A, __m128d __B, const int __R)
2051 return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B,
2052 (__v2df) __A,
2053 (__v2df)
2054 _mm_setzero_pd (),
2055 (__mmask8) __U, __R);
2058 extern __inline __m128
2059 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2060 _mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R)
2062 return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B,
2063 (__v4sf) __A,
2064 (__v4sf)
2065 _mm_setzero_ps (),
2066 (__mmask8) -1, __R);
2069 extern __inline __m128
2070 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2071 _mm_mask_sqrt_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
2072 const int __R)
2074 return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B,
2075 (__v4sf) __A,
2076 (__v4sf) __W,
2077 (__mmask8) __U, __R);
2080 extern __inline __m128
2081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2082 _mm_maskz_sqrt_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
2084 return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B,
2085 (__v4sf) __A,
2086 (__v4sf)
2087 _mm_setzero_ps (),
2088 (__mmask8) __U, __R);
2090 #else
2091 #define _mm512_sqrt_round_pd(A, C) \
2092 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, C)
2094 #define _mm512_mask_sqrt_round_pd(W, U, A, C) \
2095 (__m512d)__builtin_ia32_sqrtpd512_mask(A, W, U, C)
2097 #define _mm512_maskz_sqrt_round_pd(U, A, C) \
2098 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
2100 #define _mm512_sqrt_round_ps(A, C) \
2101 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_undefined_ps(), -1, C)
2103 #define _mm512_mask_sqrt_round_ps(W, U, A, C) \
2104 (__m512)__builtin_ia32_sqrtps512_mask(A, W, U, C)
2106 #define _mm512_maskz_sqrt_round_ps(U, A, C) \
2107 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
2109 #define _mm_sqrt_round_sd(A, B, C) \
2110 (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, \
2111 (__v2df) _mm_setzero_pd (), -1, C)
2113 #define _mm_mask_sqrt_round_sd(W, U, A, B, C) \
2114 (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, W, U, C)
2116 #define _mm_maskz_sqrt_round_sd(U, A, B, C) \
2117 (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, \
2118 (__v2df) _mm_setzero_pd (), U, C)
2120 #define _mm_sqrt_round_ss(A, B, C) \
2121 (__m128)__builtin_ia32_sqrtss_mask_round (B, A, \
2122 (__v4sf) _mm_setzero_ps (), -1, C)
2124 #define _mm_mask_sqrt_round_ss(W, U, A, B, C) \
2125 (__m128)__builtin_ia32_sqrtss_mask_round (B, A, W, U, C)
2127 #define _mm_maskz_sqrt_round_ss(U, A, B, C) \
2128 (__m128)__builtin_ia32_sqrtss_mask_round (B, A, \
2129 (__v4sf) _mm_setzero_ps (), U, C)
2130 #endif
2132 extern __inline __m512i
2133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2134 _mm512_cvtepi8_epi32 (__m128i __A)
2136 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
2137 (__v16si)
2138 _mm512_undefined_epi32 (),
2139 (__mmask16) -1);
2142 extern __inline __m512i
2143 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2144 _mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
2146 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
2147 (__v16si) __W,
2148 (__mmask16) __U);
2151 extern __inline __m512i
2152 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2153 _mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A)
2155 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
2156 (__v16si)
2157 _mm512_setzero_si512 (),
2158 (__mmask16) __U);
2161 extern __inline __m512i
2162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2163 _mm512_cvtepi8_epi64 (__m128i __A)
2165 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
2166 (__v8di)
2167 _mm512_undefined_epi32 (),
2168 (__mmask8) -1);
2171 extern __inline __m512i
2172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2173 _mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2175 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
2176 (__v8di) __W,
2177 (__mmask8) __U);
2180 extern __inline __m512i
2181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2182 _mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
2184 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
2185 (__v8di)
2186 _mm512_setzero_si512 (),
2187 (__mmask8) __U);
2190 extern __inline __m512i
2191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2192 _mm512_cvtepi16_epi32 (__m256i __A)
2194 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
2195 (__v16si)
2196 _mm512_undefined_epi32 (),
2197 (__mmask16) -1);
2200 extern __inline __m512i
2201 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2202 _mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
2204 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
2205 (__v16si) __W,
2206 (__mmask16) __U);
2209 extern __inline __m512i
2210 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2211 _mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A)
2213 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
2214 (__v16si)
2215 _mm512_setzero_si512 (),
2216 (__mmask16) __U);
2219 extern __inline __m512i
2220 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2221 _mm512_cvtepi16_epi64 (__m128i __A)
2223 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
2224 (__v8di)
2225 _mm512_undefined_epi32 (),
2226 (__mmask8) -1);
2229 extern __inline __m512i
2230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2231 _mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2233 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
2234 (__v8di) __W,
2235 (__mmask8) __U);
2238 extern __inline __m512i
2239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2240 _mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
2242 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
2243 (__v8di)
2244 _mm512_setzero_si512 (),
2245 (__mmask8) __U);
2248 extern __inline __m512i
2249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2250 _mm512_cvtepi32_epi64 (__m256i __X)
2252 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
2253 (__v8di)
2254 _mm512_undefined_epi32 (),
2255 (__mmask8) -1);
2258 extern __inline __m512i
2259 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2260 _mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
2262 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
2263 (__v8di) __W,
2264 (__mmask8) __U);
2267 extern __inline __m512i
2268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2269 _mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X)
2271 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
2272 (__v8di)
2273 _mm512_setzero_si512 (),
2274 (__mmask8) __U);
2277 extern __inline __m512i
2278 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2279 _mm512_cvtepu8_epi32 (__m128i __A)
2281 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
2282 (__v16si)
2283 _mm512_undefined_epi32 (),
2284 (__mmask16) -1);
2287 extern __inline __m512i
2288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2289 _mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
2291 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
2292 (__v16si) __W,
2293 (__mmask16) __U);
2296 extern __inline __m512i
2297 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2298 _mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A)
2300 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
2301 (__v16si)
2302 _mm512_setzero_si512 (),
2303 (__mmask16) __U);
2306 extern __inline __m512i
2307 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2308 _mm512_cvtepu8_epi64 (__m128i __A)
2310 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
2311 (__v8di)
2312 _mm512_undefined_epi32 (),
2313 (__mmask8) -1);
2316 extern __inline __m512i
2317 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2318 _mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2320 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
2321 (__v8di) __W,
2322 (__mmask8) __U);
2325 extern __inline __m512i
2326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2327 _mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
2329 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
2330 (__v8di)
2331 _mm512_setzero_si512 (),
2332 (__mmask8) __U);
2335 extern __inline __m512i
2336 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2337 _mm512_cvtepu16_epi32 (__m256i __A)
2339 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2340 (__v16si)
2341 _mm512_undefined_epi32 (),
2342 (__mmask16) -1);
2345 extern __inline __m512i
2346 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2347 _mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
2349 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2350 (__v16si) __W,
2351 (__mmask16) __U);
2354 extern __inline __m512i
2355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2356 _mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A)
2358 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2359 (__v16si)
2360 _mm512_setzero_si512 (),
2361 (__mmask16) __U);
2364 extern __inline __m512i
2365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2366 _mm512_cvtepu16_epi64 (__m128i __A)
2368 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2369 (__v8di)
2370 _mm512_undefined_epi32 (),
2371 (__mmask8) -1);
2374 extern __inline __m512i
2375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2376 _mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2378 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2379 (__v8di) __W,
2380 (__mmask8) __U);
2383 extern __inline __m512i
2384 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2385 _mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
2387 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2388 (__v8di)
2389 _mm512_setzero_si512 (),
2390 (__mmask8) __U);
2393 extern __inline __m512i
2394 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2395 _mm512_cvtepu32_epi64 (__m256i __X)
2397 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2398 (__v8di)
2399 _mm512_undefined_epi32 (),
2400 (__mmask8) -1);
2403 extern __inline __m512i
2404 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2405 _mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
2407 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2408 (__v8di) __W,
2409 (__mmask8) __U);
2412 extern __inline __m512i
2413 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2414 _mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X)
2416 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2417 (__v8di)
2418 _mm512_setzero_si512 (),
2419 (__mmask8) __U);
2422 #ifdef __OPTIMIZE__
2423 extern __inline __m512d
2424 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2425 _mm512_add_round_pd (__m512d __A, __m512d __B, const int __R)
2427 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2428 (__v8df) __B,
2429 (__v8df)
2430 _mm512_undefined_pd (),
2431 (__mmask8) -1, __R);
2434 extern __inline __m512d
2435 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2436 _mm512_mask_add_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2437 __m512d __B, const int __R)
2439 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2440 (__v8df) __B,
2441 (__v8df) __W,
2442 (__mmask8) __U, __R);
2445 extern __inline __m512d
2446 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2447 _mm512_maskz_add_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2448 const int __R)
2450 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2451 (__v8df) __B,
2452 (__v8df)
2453 _mm512_setzero_pd (),
2454 (__mmask8) __U, __R);
2457 extern __inline __m512
2458 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2459 _mm512_add_round_ps (__m512 __A, __m512 __B, const int __R)
2461 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2462 (__v16sf) __B,
2463 (__v16sf)
2464 _mm512_undefined_ps (),
2465 (__mmask16) -1, __R);
2468 extern __inline __m512
2469 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2470 _mm512_mask_add_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2471 __m512 __B, const int __R)
2473 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2474 (__v16sf) __B,
2475 (__v16sf) __W,
2476 (__mmask16) __U, __R);
2479 extern __inline __m512
2480 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2481 _mm512_maskz_add_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2483 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2484 (__v16sf) __B,
2485 (__v16sf)
2486 _mm512_setzero_ps (),
2487 (__mmask16) __U, __R);
2490 extern __inline __m512d
2491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2492 _mm512_sub_round_pd (__m512d __A, __m512d __B, const int __R)
2494 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2495 (__v8df) __B,
2496 (__v8df)
2497 _mm512_undefined_pd (),
2498 (__mmask8) -1, __R);
2501 extern __inline __m512d
2502 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2503 _mm512_mask_sub_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2504 __m512d __B, const int __R)
2506 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2507 (__v8df) __B,
2508 (__v8df) __W,
2509 (__mmask8) __U, __R);
2512 extern __inline __m512d
2513 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2514 _mm512_maskz_sub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2515 const int __R)
2517 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2518 (__v8df) __B,
2519 (__v8df)
2520 _mm512_setzero_pd (),
2521 (__mmask8) __U, __R);
2524 extern __inline __m512
2525 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2526 _mm512_sub_round_ps (__m512 __A, __m512 __B, const int __R)
2528 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2529 (__v16sf) __B,
2530 (__v16sf)
2531 _mm512_undefined_ps (),
2532 (__mmask16) -1, __R);
2535 extern __inline __m512
2536 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2537 _mm512_mask_sub_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2538 __m512 __B, const int __R)
2540 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2541 (__v16sf) __B,
2542 (__v16sf) __W,
2543 (__mmask16) __U, __R);
2546 extern __inline __m512
2547 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2548 _mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2550 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2551 (__v16sf) __B,
2552 (__v16sf)
2553 _mm512_setzero_ps (),
2554 (__mmask16) __U, __R);
2556 #else
2557 #define _mm512_add_round_pd(A, B, C) \
2558 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2560 #define _mm512_mask_add_round_pd(W, U, A, B, C) \
2561 (__m512d)__builtin_ia32_addpd512_mask(A, B, W, U, C)
2563 #define _mm512_maskz_add_round_pd(U, A, B, C) \
2564 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2566 #define _mm512_add_round_ps(A, B, C) \
2567 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2569 #define _mm512_mask_add_round_ps(W, U, A, B, C) \
2570 (__m512)__builtin_ia32_addps512_mask(A, B, W, U, C)
2572 #define _mm512_maskz_add_round_ps(U, A, B, C) \
2573 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2575 #define _mm512_sub_round_pd(A, B, C) \
2576 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2578 #define _mm512_mask_sub_round_pd(W, U, A, B, C) \
2579 (__m512d)__builtin_ia32_subpd512_mask(A, B, W, U, C)
2581 #define _mm512_maskz_sub_round_pd(U, A, B, C) \
2582 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2584 #define _mm512_sub_round_ps(A, B, C) \
2585 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2587 #define _mm512_mask_sub_round_ps(W, U, A, B, C) \
2588 (__m512)__builtin_ia32_subps512_mask(A, B, W, U, C)
2590 #define _mm512_maskz_sub_round_ps(U, A, B, C) \
2591 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2592 #endif
2594 #ifdef __OPTIMIZE__
2595 extern __inline __m512d
2596 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2597 _mm512_mul_round_pd (__m512d __A, __m512d __B, const int __R)
2599 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2600 (__v8df) __B,
2601 (__v8df)
2602 _mm512_undefined_pd (),
2603 (__mmask8) -1, __R);
2606 extern __inline __m512d
2607 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2608 _mm512_mask_mul_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2609 __m512d __B, const int __R)
2611 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2612 (__v8df) __B,
2613 (__v8df) __W,
2614 (__mmask8) __U, __R);
2617 extern __inline __m512d
2618 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2619 _mm512_maskz_mul_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2620 const int __R)
2622 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2623 (__v8df) __B,
2624 (__v8df)
2625 _mm512_setzero_pd (),
2626 (__mmask8) __U, __R);
2629 extern __inline __m512
2630 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2631 _mm512_mul_round_ps (__m512 __A, __m512 __B, const int __R)
2633 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2634 (__v16sf) __B,
2635 (__v16sf)
2636 _mm512_undefined_ps (),
2637 (__mmask16) -1, __R);
2640 extern __inline __m512
2641 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2642 _mm512_mask_mul_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2643 __m512 __B, const int __R)
2645 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2646 (__v16sf) __B,
2647 (__v16sf) __W,
2648 (__mmask16) __U, __R);
2651 extern __inline __m512
2652 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2653 _mm512_maskz_mul_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2655 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2656 (__v16sf) __B,
2657 (__v16sf)
2658 _mm512_setzero_ps (),
2659 (__mmask16) __U, __R);
2662 extern __inline __m512d
2663 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2664 _mm512_div_round_pd (__m512d __M, __m512d __V, const int __R)
2666 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2667 (__v8df) __V,
2668 (__v8df)
2669 _mm512_undefined_pd (),
2670 (__mmask8) -1, __R);
2673 extern __inline __m512d
2674 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2675 _mm512_mask_div_round_pd (__m512d __W, __mmask8 __U, __m512d __M,
2676 __m512d __V, const int __R)
2678 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2679 (__v8df) __V,
2680 (__v8df) __W,
2681 (__mmask8) __U, __R);
2684 extern __inline __m512d
2685 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2686 _mm512_maskz_div_round_pd (__mmask8 __U, __m512d __M, __m512d __V,
2687 const int __R)
2689 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2690 (__v8df) __V,
2691 (__v8df)
2692 _mm512_setzero_pd (),
2693 (__mmask8) __U, __R);
2696 extern __inline __m512
2697 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2698 _mm512_div_round_ps (__m512 __A, __m512 __B, const int __R)
2700 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2701 (__v16sf) __B,
2702 (__v16sf)
2703 _mm512_undefined_ps (),
2704 (__mmask16) -1, __R);
2707 extern __inline __m512
2708 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2709 _mm512_mask_div_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2710 __m512 __B, const int __R)
2712 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2713 (__v16sf) __B,
2714 (__v16sf) __W,
2715 (__mmask16) __U, __R);
2718 extern __inline __m512
2719 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2720 _mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2722 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2723 (__v16sf) __B,
2724 (__v16sf)
2725 _mm512_setzero_ps (),
2726 (__mmask16) __U, __R);
2729 extern __inline __m128d
2730 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2731 _mm_mul_round_sd (__m128d __A, __m128d __B, const int __R)
2733 return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
2734 (__v2df) __B,
2735 __R);
2738 extern __inline __m128d
2739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2740 _mm_mask_mul_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
2741 __m128d __B, const int __R)
2743 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
2744 (__v2df) __B,
2745 (__v2df) __W,
2746 (__mmask8) __U, __R);
2749 extern __inline __m128d
2750 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2751 _mm_maskz_mul_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
2752 const int __R)
2754 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
2755 (__v2df) __B,
2756 (__v2df)
2757 _mm_setzero_pd (),
2758 (__mmask8) __U, __R);
2761 extern __inline __m128
2762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2763 _mm_mul_round_ss (__m128 __A, __m128 __B, const int __R)
2765 return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
2766 (__v4sf) __B,
2767 __R);
2770 extern __inline __m128
2771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2772 _mm_mask_mul_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
2773 __m128 __B, const int __R)
2775 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
2776 (__v4sf) __B,
2777 (__v4sf) __W,
2778 (__mmask8) __U, __R);
2781 extern __inline __m128
2782 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2783 _mm_maskz_mul_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
2784 const int __R)
2786 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
2787 (__v4sf) __B,
2788 (__v4sf)
2789 _mm_setzero_ps (),
2790 (__mmask8) __U, __R);
2793 extern __inline __m128d
2794 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2795 _mm_div_round_sd (__m128d __A, __m128d __B, const int __R)
2797 return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
2798 (__v2df) __B,
2799 __R);
2802 extern __inline __m128d
2803 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2804 _mm_mask_div_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
2805 __m128d __B, const int __R)
2807 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
2808 (__v2df) __B,
2809 (__v2df) __W,
2810 (__mmask8) __U, __R);
2813 extern __inline __m128d
2814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2815 _mm_maskz_div_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
2816 const int __R)
2818 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
2819 (__v2df) __B,
2820 (__v2df)
2821 _mm_setzero_pd (),
2822 (__mmask8) __U, __R);
2825 extern __inline __m128
2826 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2827 _mm_div_round_ss (__m128 __A, __m128 __B, const int __R)
2829 return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
2830 (__v4sf) __B,
2831 __R);
2834 extern __inline __m128
2835 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2836 _mm_mask_div_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
2837 __m128 __B, const int __R)
2839 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
2840 (__v4sf) __B,
2841 (__v4sf) __W,
2842 (__mmask8) __U, __R);
2845 extern __inline __m128
2846 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2847 _mm_maskz_div_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
2848 const int __R)
2850 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
2851 (__v4sf) __B,
2852 (__v4sf)
2853 _mm_setzero_ps (),
2854 (__mmask8) __U, __R);
2857 #else
2858 #define _mm512_mul_round_pd(A, B, C) \
2859 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2861 #define _mm512_mask_mul_round_pd(W, U, A, B, C) \
2862 (__m512d)__builtin_ia32_mulpd512_mask(A, B, W, U, C)
2864 #define _mm512_maskz_mul_round_pd(U, A, B, C) \
2865 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2867 #define _mm512_mul_round_ps(A, B, C) \
2868 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2870 #define _mm512_mask_mul_round_ps(W, U, A, B, C) \
2871 (__m512)__builtin_ia32_mulps512_mask(A, B, W, U, C)
2873 #define _mm512_maskz_mul_round_ps(U, A, B, C) \
2874 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2876 #define _mm512_div_round_pd(A, B, C) \
2877 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2879 #define _mm512_mask_div_round_pd(W, U, A, B, C) \
2880 (__m512d)__builtin_ia32_divpd512_mask(A, B, W, U, C)
2882 #define _mm512_maskz_div_round_pd(U, A, B, C) \
2883 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2885 #define _mm512_div_round_ps(A, B, C) \
2886 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2888 #define _mm512_mask_div_round_ps(W, U, A, B, C) \
2889 (__m512)__builtin_ia32_divps512_mask(A, B, W, U, C)
2891 #define _mm512_maskz_div_round_ps(U, A, B, C) \
2892 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2894 #define _mm_mul_round_sd(A, B, C) \
2895 (__m128d)__builtin_ia32_mulsd_round(A, B, C)
2897 #define _mm_mask_mul_round_sd(W, U, A, B, C) \
2898 (__m128d)__builtin_ia32_mulsd_mask_round(A, B, W, U, C)
2900 #define _mm_maskz_mul_round_sd(U, A, B, C) \
2901 (__m128d)__builtin_ia32_mulsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
2903 #define _mm_mul_round_ss(A, B, C) \
2904 (__m128)__builtin_ia32_mulss_round(A, B, C)
2906 #define _mm_mask_mul_round_ss(W, U, A, B, C) \
2907 (__m128)__builtin_ia32_mulss_mask_round(A, B, W, U, C)
2909 #define _mm_maskz_mul_round_ss(U, A, B, C) \
2910 (__m128)__builtin_ia32_mulss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
2912 #define _mm_div_round_sd(A, B, C) \
2913 (__m128d)__builtin_ia32_divsd_round(A, B, C)
2915 #define _mm_mask_div_round_sd(W, U, A, B, C) \
2916 (__m128d)__builtin_ia32_divsd_mask_round(A, B, W, U, C)
2918 #define _mm_maskz_div_round_sd(U, A, B, C) \
2919 (__m128d)__builtin_ia32_divsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
2921 #define _mm_div_round_ss(A, B, C) \
2922 (__m128)__builtin_ia32_divss_round(A, B, C)
2924 #define _mm_mask_div_round_ss(W, U, A, B, C) \
2925 (__m128)__builtin_ia32_divss_mask_round(A, B, W, U, C)
2927 #define _mm_maskz_div_round_ss(U, A, B, C) \
2928 (__m128)__builtin_ia32_divss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
2930 #endif
2932 #ifdef __OPTIMIZE__
2933 extern __inline __m512d
2934 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2935 _mm512_max_round_pd (__m512d __A, __m512d __B, const int __R)
2937 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2938 (__v8df) __B,
2939 (__v8df)
2940 _mm512_undefined_pd (),
2941 (__mmask8) -1, __R);
2944 extern __inline __m512d
2945 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2946 _mm512_mask_max_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2947 __m512d __B, const int __R)
2949 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2950 (__v8df) __B,
2951 (__v8df) __W,
2952 (__mmask8) __U, __R);
2955 extern __inline __m512d
2956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2957 _mm512_maskz_max_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2958 const int __R)
2960 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2961 (__v8df) __B,
2962 (__v8df)
2963 _mm512_setzero_pd (),
2964 (__mmask8) __U, __R);
2967 extern __inline __m512
2968 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2969 _mm512_max_round_ps (__m512 __A, __m512 __B, const int __R)
2971 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2972 (__v16sf) __B,
2973 (__v16sf)
2974 _mm512_undefined_ps (),
2975 (__mmask16) -1, __R);
2978 extern __inline __m512
2979 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2980 _mm512_mask_max_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2981 __m512 __B, const int __R)
2983 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2984 (__v16sf) __B,
2985 (__v16sf) __W,
2986 (__mmask16) __U, __R);
2989 extern __inline __m512
2990 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2991 _mm512_maskz_max_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2993 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2994 (__v16sf) __B,
2995 (__v16sf)
2996 _mm512_setzero_ps (),
2997 (__mmask16) __U, __R);
3000 extern __inline __m512d
3001 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3002 _mm512_min_round_pd (__m512d __A, __m512d __B, const int __R)
3004 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
3005 (__v8df) __B,
3006 (__v8df)
3007 _mm512_undefined_pd (),
3008 (__mmask8) -1, __R);
3011 extern __inline __m512d
3012 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3013 _mm512_mask_min_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
3014 __m512d __B, const int __R)
3016 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
3017 (__v8df) __B,
3018 (__v8df) __W,
3019 (__mmask8) __U, __R);
3022 extern __inline __m512d
3023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3024 _mm512_maskz_min_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3025 const int __R)
3027 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
3028 (__v8df) __B,
3029 (__v8df)
3030 _mm512_setzero_pd (),
3031 (__mmask8) __U, __R);
3034 extern __inline __m512
3035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3036 _mm512_min_round_ps (__m512 __A, __m512 __B, const int __R)
3038 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
3039 (__v16sf) __B,
3040 (__v16sf)
3041 _mm512_undefined_ps (),
3042 (__mmask16) -1, __R);
3045 extern __inline __m512
3046 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3047 _mm512_mask_min_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
3048 __m512 __B, const int __R)
3050 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
3051 (__v16sf) __B,
3052 (__v16sf) __W,
3053 (__mmask16) __U, __R);
3056 extern __inline __m512
3057 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3058 _mm512_maskz_min_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
3060 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
3061 (__v16sf) __B,
3062 (__v16sf)
3063 _mm512_setzero_ps (),
3064 (__mmask16) __U, __R);
3066 #else
3067 #define _mm512_max_round_pd(A, B, R) \
3068 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
3070 #define _mm512_mask_max_round_pd(W, U, A, B, R) \
3071 (__m512d)__builtin_ia32_maxpd512_mask(A, B, W, U, R)
3073 #define _mm512_maskz_max_round_pd(U, A, B, R) \
3074 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
3076 #define _mm512_max_round_ps(A, B, R) \
3077 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_undefined_pd(), -1, R)
3079 #define _mm512_mask_max_round_ps(W, U, A, B, R) \
3080 (__m512)__builtin_ia32_maxps512_mask(A, B, W, U, R)
3082 #define _mm512_maskz_max_round_ps(U, A, B, R) \
3083 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
3085 #define _mm512_min_round_pd(A, B, R) \
3086 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
3088 #define _mm512_mask_min_round_pd(W, U, A, B, R) \
3089 (__m512d)__builtin_ia32_minpd512_mask(A, B, W, U, R)
3091 #define _mm512_maskz_min_round_pd(U, A, B, R) \
3092 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
3094 #define _mm512_min_round_ps(A, B, R) \
3095 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, R)
3097 #define _mm512_mask_min_round_ps(W, U, A, B, R) \
3098 (__m512)__builtin_ia32_minps512_mask(A, B, W, U, R)
3100 #define _mm512_maskz_min_round_ps(U, A, B, R) \
3101 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
3102 #endif
3104 #ifdef __OPTIMIZE__
3105 extern __inline __m512d
3106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3107 _mm512_scalef_round_pd (__m512d __A, __m512d __B, const int __R)
3109 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
3110 (__v8df) __B,
3111 (__v8df)
3112 _mm512_undefined_pd (),
3113 (__mmask8) -1, __R);
3116 extern __inline __m512d
3117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3118 _mm512_mask_scalef_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
3119 __m512d __B, const int __R)
3121 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
3122 (__v8df) __B,
3123 (__v8df) __W,
3124 (__mmask8) __U, __R);
3127 extern __inline __m512d
3128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3129 _mm512_maskz_scalef_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3130 const int __R)
3132 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
3133 (__v8df) __B,
3134 (__v8df)
3135 _mm512_setzero_pd (),
3136 (__mmask8) __U, __R);
3139 extern __inline __m512
3140 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3141 _mm512_scalef_round_ps (__m512 __A, __m512 __B, const int __R)
3143 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
3144 (__v16sf) __B,
3145 (__v16sf)
3146 _mm512_undefined_ps (),
3147 (__mmask16) -1, __R);
3150 extern __inline __m512
3151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3152 _mm512_mask_scalef_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
3153 __m512 __B, const int __R)
3155 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
3156 (__v16sf) __B,
3157 (__v16sf) __W,
3158 (__mmask16) __U, __R);
3161 extern __inline __m512
3162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3163 _mm512_maskz_scalef_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3164 const int __R)
3166 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
3167 (__v16sf) __B,
3168 (__v16sf)
3169 _mm512_setzero_ps (),
3170 (__mmask16) __U, __R);
3173 extern __inline __m128d
3174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3175 _mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R)
3177 return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
3178 (__v2df) __B,
3179 (__v2df)
3180 _mm_setzero_pd (),
3181 (__mmask8) -1, __R);
3184 extern __inline __m128d
3185 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3186 _mm_mask_scalef_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
3187 const int __R)
3189 return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
3190 (__v2df) __B,
3191 (__v2df) __W,
3192 (__mmask8) __U, __R);
3195 extern __inline __m128d
3196 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3197 _mm_maskz_scalef_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
3198 const int __R)
3200 return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
3201 (__v2df) __B,
3202 (__v2df)
3203 _mm_setzero_pd (),
3204 (__mmask8) __U, __R);
3207 extern __inline __m128
3208 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3209 _mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R)
3211 return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
3212 (__v4sf) __B,
3213 (__v4sf)
3214 _mm_setzero_ps (),
3215 (__mmask8) -1, __R);
3218 extern __inline __m128
3219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3220 _mm_mask_scalef_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
3221 const int __R)
3223 return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
3224 (__v4sf) __B,
3225 (__v4sf) __W,
3226 (__mmask8) __U, __R);
3229 extern __inline __m128
3230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3231 _mm_maskz_scalef_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
3233 return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
3234 (__v4sf) __B,
3235 (__v4sf)
3236 _mm_setzero_ps (),
3237 (__mmask8) __U, __R);
3239 #else
3240 #define _mm512_scalef_round_pd(A, B, C) \
3241 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
3243 #define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
3244 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C)
3246 #define _mm512_maskz_scalef_round_pd(U, A, B, C) \
3247 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
3249 #define _mm512_scalef_round_ps(A, B, C) \
3250 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
3252 #define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
3253 (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C)
3255 #define _mm512_maskz_scalef_round_ps(U, A, B, C) \
3256 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
3258 #define _mm_scalef_round_sd(A, B, C) \
3259 (__m128d)__builtin_ia32_scalefsd_mask_round (A, B, \
3260 (__v2df)_mm_setzero_pd (), -1, C)
3262 #define _mm_scalef_round_ss(A, B, C) \
3263 (__m128)__builtin_ia32_scalefss_mask_round (A, B, \
3264 (__v4sf)_mm_setzero_ps (), -1, C)
3265 #endif
3267 #ifdef __OPTIMIZE__
3268 extern __inline __m512d
3269 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3270 _mm512_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3272 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3273 (__v8df) __B,
3274 (__v8df) __C,
3275 (__mmask8) -1, __R);
3278 extern __inline __m512d
3279 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3280 _mm512_mask_fmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3281 __m512d __C, const int __R)
3283 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3284 (__v8df) __B,
3285 (__v8df) __C,
3286 (__mmask8) __U, __R);
3289 extern __inline __m512d
3290 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3291 _mm512_mask3_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3292 __mmask8 __U, const int __R)
3294 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
3295 (__v8df) __B,
3296 (__v8df) __C,
3297 (__mmask8) __U, __R);
3300 extern __inline __m512d
3301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3302 _mm512_maskz_fmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3303 __m512d __C, const int __R)
3305 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
3306 (__v8df) __B,
3307 (__v8df) __C,
3308 (__mmask8) __U, __R);
3311 extern __inline __m512
3312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3313 _mm512_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3315 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3316 (__v16sf) __B,
3317 (__v16sf) __C,
3318 (__mmask16) -1, __R);
3321 extern __inline __m512
3322 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3323 _mm512_mask_fmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3324 __m512 __C, const int __R)
3326 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3327 (__v16sf) __B,
3328 (__v16sf) __C,
3329 (__mmask16) __U, __R);
3332 extern __inline __m512
3333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3334 _mm512_mask3_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3335 __mmask16 __U, const int __R)
3337 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
3338 (__v16sf) __B,
3339 (__v16sf) __C,
3340 (__mmask16) __U, __R);
3343 extern __inline __m512
3344 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3345 _mm512_maskz_fmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3346 __m512 __C, const int __R)
3348 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
3349 (__v16sf) __B,
3350 (__v16sf) __C,
3351 (__mmask16) __U, __R);
3354 extern __inline __m512d
3355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3356 _mm512_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3358 return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A,
3359 (__v8df) __B,
3360 (__v8df) __C,
3361 (__mmask8) -1, __R);
3364 extern __inline __m512d
3365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3366 _mm512_mask_fmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3367 __m512d __C, const int __R)
3369 return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A,
3370 (__v8df) __B,
3371 (__v8df) __C,
3372 (__mmask8) __U, __R);
3375 extern __inline __m512d
3376 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3377 _mm512_mask3_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3378 __mmask8 __U, const int __R)
3380 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
3381 (__v8df) __B,
3382 (__v8df) __C,
3383 (__mmask8) __U, __R);
3386 extern __inline __m512d
3387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3388 _mm512_maskz_fmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3389 __m512d __C, const int __R)
3391 return (__m512d) __builtin_ia32_vfmsubpd512_maskz ((__v8df) __A,
3392 (__v8df) __B,
3393 (__v8df) __C,
3394 (__mmask8) __U, __R);
3397 extern __inline __m512
3398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3399 _mm512_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3401 return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A,
3402 (__v16sf) __B,
3403 (__v16sf) __C,
3404 (__mmask16) -1, __R);
3407 extern __inline __m512
3408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3409 _mm512_mask_fmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3410 __m512 __C, const int __R)
3412 return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A,
3413 (__v16sf) __B,
3414 (__v16sf) __C,
3415 (__mmask16) __U, __R);
3418 extern __inline __m512
3419 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3420 _mm512_mask3_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3421 __mmask16 __U, const int __R)
3423 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
3424 (__v16sf) __B,
3425 (__v16sf) __C,
3426 (__mmask16) __U, __R);
3429 extern __inline __m512
3430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3431 _mm512_maskz_fmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3432 __m512 __C, const int __R)
3434 return (__m512) __builtin_ia32_vfmsubps512_maskz ((__v16sf) __A,
3435 (__v16sf) __B,
3436 (__v16sf) __C,
3437 (__mmask16) __U, __R);
3440 extern __inline __m512d
3441 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3442 _mm512_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3444 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3445 (__v8df) __B,
3446 (__v8df) __C,
3447 (__mmask8) -1, __R);
3450 extern __inline __m512d
3451 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3452 _mm512_mask_fmaddsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3453 __m512d __C, const int __R)
3455 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3456 (__v8df) __B,
3457 (__v8df) __C,
3458 (__mmask8) __U, __R);
3461 extern __inline __m512d
3462 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3463 _mm512_mask3_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3464 __mmask8 __U, const int __R)
3466 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
3467 (__v8df) __B,
3468 (__v8df) __C,
3469 (__mmask8) __U, __R);
3472 extern __inline __m512d
3473 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3474 _mm512_maskz_fmaddsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3475 __m512d __C, const int __R)
3477 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3478 (__v8df) __B,
3479 (__v8df) __C,
3480 (__mmask8) __U, __R);
3483 extern __inline __m512
3484 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3485 _mm512_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3487 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3488 (__v16sf) __B,
3489 (__v16sf) __C,
3490 (__mmask16) -1, __R);
3493 extern __inline __m512
3494 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3495 _mm512_mask_fmaddsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3496 __m512 __C, const int __R)
3498 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3499 (__v16sf) __B,
3500 (__v16sf) __C,
3501 (__mmask16) __U, __R);
3504 extern __inline __m512
3505 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3506 _mm512_mask3_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3507 __mmask16 __U, const int __R)
3509 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
3510 (__v16sf) __B,
3511 (__v16sf) __C,
3512 (__mmask16) __U, __R);
3515 extern __inline __m512
3516 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3517 _mm512_maskz_fmaddsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3518 __m512 __C, const int __R)
3520 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3521 (__v16sf) __B,
3522 (__v16sf) __C,
3523 (__mmask16) __U, __R);
3526 extern __inline __m512d
3527 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3528 _mm512_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3530 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3531 (__v8df) __B,
3532 -(__v8df) __C,
3533 (__mmask8) -1, __R);
3536 extern __inline __m512d
3537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3538 _mm512_mask_fmsubadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3539 __m512d __C, const int __R)
3541 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3542 (__v8df) __B,
3543 -(__v8df) __C,
3544 (__mmask8) __U, __R);
3547 extern __inline __m512d
3548 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3549 _mm512_mask3_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3550 __mmask8 __U, const int __R)
3552 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3553 (__v8df) __B,
3554 (__v8df) __C,
3555 (__mmask8) __U, __R);
3558 extern __inline __m512d
3559 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3560 _mm512_maskz_fmsubadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3561 __m512d __C, const int __R)
3563 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3564 (__v8df) __B,
3565 -(__v8df) __C,
3566 (__mmask8) __U, __R);
3569 extern __inline __m512
3570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3571 _mm512_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3573 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3574 (__v16sf) __B,
3575 -(__v16sf) __C,
3576 (__mmask16) -1, __R);
3579 extern __inline __m512
3580 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3581 _mm512_mask_fmsubadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3582 __m512 __C, const int __R)
3584 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3585 (__v16sf) __B,
3586 -(__v16sf) __C,
3587 (__mmask16) __U, __R);
3590 extern __inline __m512
3591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3592 _mm512_mask3_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3593 __mmask16 __U, const int __R)
3595 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3596 (__v16sf) __B,
3597 (__v16sf) __C,
3598 (__mmask16) __U, __R);
3601 extern __inline __m512
3602 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3603 _mm512_maskz_fmsubadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3604 __m512 __C, const int __R)
3606 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3607 (__v16sf) __B,
3608 -(__v16sf) __C,
3609 (__mmask16) __U, __R);
3612 extern __inline __m512d
3613 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3614 _mm512_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3616 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
3617 (__v8df) __B,
3618 (__v8df) __C,
3619 (__mmask8) -1, __R);
3622 extern __inline __m512d
3623 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3624 _mm512_mask_fnmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3625 __m512d __C, const int __R)
3627 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
3628 (__v8df) __B,
3629 (__v8df) __C,
3630 (__mmask8) __U, __R);
3633 extern __inline __m512d
3634 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3635 _mm512_mask3_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3636 __mmask8 __U, const int __R)
3638 return (__m512d) __builtin_ia32_vfnmaddpd512_mask3 ((__v8df) __A,
3639 (__v8df) __B,
3640 (__v8df) __C,
3641 (__mmask8) __U, __R);
3644 extern __inline __m512d
3645 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3646 _mm512_maskz_fnmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3647 __m512d __C, const int __R)
3649 return (__m512d) __builtin_ia32_vfnmaddpd512_maskz ((__v8df) __A,
3650 (__v8df) __B,
3651 (__v8df) __C,
3652 (__mmask8) __U, __R);
3655 extern __inline __m512
3656 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3657 _mm512_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3659 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
3660 (__v16sf) __B,
3661 (__v16sf) __C,
3662 (__mmask16) -1, __R);
3665 extern __inline __m512
3666 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3667 _mm512_mask_fnmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3668 __m512 __C, const int __R)
3670 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
3671 (__v16sf) __B,
3672 (__v16sf) __C,
3673 (__mmask16) __U, __R);
3676 extern __inline __m512
3677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3678 _mm512_mask3_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3679 __mmask16 __U, const int __R)
3681 return (__m512) __builtin_ia32_vfnmaddps512_mask3 ((__v16sf) __A,
3682 (__v16sf) __B,
3683 (__v16sf) __C,
3684 (__mmask16) __U, __R);
3687 extern __inline __m512
3688 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3689 _mm512_maskz_fnmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3690 __m512 __C, const int __R)
3692 return (__m512) __builtin_ia32_vfnmaddps512_maskz ((__v16sf) __A,
3693 (__v16sf) __B,
3694 (__v16sf) __C,
3695 (__mmask16) __U, __R);
3698 extern __inline __m512d
3699 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3700 _mm512_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3702 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
3703 (__v8df) __B,
3704 (__v8df) __C,
3705 (__mmask8) -1, __R);
3708 extern __inline __m512d
3709 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3710 _mm512_mask_fnmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3711 __m512d __C, const int __R)
3713 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
3714 (__v8df) __B,
3715 (__v8df) __C,
3716 (__mmask8) __U, __R);
3719 extern __inline __m512d
3720 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3721 _mm512_mask3_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3722 __mmask8 __U, const int __R)
3724 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
3725 (__v8df) __B,
3726 (__v8df) __C,
3727 (__mmask8) __U, __R);
3730 extern __inline __m512d
3731 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3732 _mm512_maskz_fnmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3733 __m512d __C, const int __R)
3735 return (__m512d) __builtin_ia32_vfnmsubpd512_maskz ((__v8df) __A,
3736 (__v8df) __B,
3737 (__v8df) __C,
3738 (__mmask8) __U, __R);
3741 extern __inline __m512
3742 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3743 _mm512_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3745 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
3746 (__v16sf) __B,
3747 (__v16sf) __C,
3748 (__mmask16) -1, __R);
3751 extern __inline __m512
3752 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3753 _mm512_mask_fnmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3754 __m512 __C, const int __R)
3756 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
3757 (__v16sf) __B,
3758 (__v16sf) __C,
3759 (__mmask16) __U, __R);
3762 extern __inline __m512
3763 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3764 _mm512_mask3_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3765 __mmask16 __U, const int __R)
3767 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
3768 (__v16sf) __B,
3769 (__v16sf) __C,
3770 (__mmask16) __U, __R);
3773 extern __inline __m512
3774 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3775 _mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3776 __m512 __C, const int __R)
3778 return (__m512) __builtin_ia32_vfnmsubps512_maskz ((__v16sf) __A,
3779 (__v16sf) __B,
3780 (__v16sf) __C,
3781 (__mmask16) __U, __R);
3783 #else
3784 #define _mm512_fmadd_round_pd(A, B, C, R) \
3785 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, -1, R)
3787 #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
3788 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3790 #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
3791 (__m512d)__builtin_ia32_vfmaddpd512_mask3(A, B, C, U, R)
3793 #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
3794 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, C, U, R)
3796 #define _mm512_fmadd_round_ps(A, B, C, R) \
3797 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, -1, R)
3799 #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
3800 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, U, R)
3802 #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
3803 (__m512)__builtin_ia32_vfmaddps512_mask3(A, B, C, U, R)
3805 #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
3806 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, C, U, R)
3808 #define _mm512_fmsub_round_pd(A, B, C, R) \
3809 (__m512d)__builtin_ia32_vfmsubpd512_mask(A, B, C, -1, R)
3811 #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
3812 (__m512d)__builtin_ia32_vfmsubpd512_mask(A, B, C, U, R)
3814 #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
3815 (__m512d)__builtin_ia32_vfmsubpd512_mask3(A, B, C, U, R)
3817 #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
3818 (__m512d)__builtin_ia32_vfmsubpd512_maskz(A, B, C, U, R)
3820 #define _mm512_fmsub_round_ps(A, B, C, R) \
3821 (__m512)__builtin_ia32_vfmsubps512_mask(A, B, C, -1, R)
3823 #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
3824 (__m512)__builtin_ia32_vfmsubps512_mask(A, B, C, U, R)
3826 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
3827 (__m512)__builtin_ia32_vfmsubps512_mask3(A, B, C, U, R)
3829 #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
3830 (__m512)__builtin_ia32_vfmsubps512_maskz(A, B, C, U, R)
3832 #define _mm512_fmaddsub_round_pd(A, B, C, R) \
3833 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, -1, R)
3835 #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
3836 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, U, R)
3838 #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
3839 (__m512d)__builtin_ia32_vfmaddsubpd512_mask3(A, B, C, U, R)
3841 #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
3842 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, C, U, R)
3844 #define _mm512_fmaddsub_round_ps(A, B, C, R) \
3845 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, -1, R)
3847 #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
3848 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, U, R)
3850 #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
3851 (__m512)__builtin_ia32_vfmaddsubps512_mask3(A, B, C, U, R)
3853 #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
3854 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, C, U, R)
3856 #define _mm512_fmsubadd_round_pd(A, B, C, R) \
3857 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), -1, R)
3859 #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
3860 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), U, R)
3862 #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
3863 (__m512d)__builtin_ia32_vfmsubaddpd512_mask3(A, B, C, U, R)
3865 #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
3866 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, -(C), U, R)
3868 #define _mm512_fmsubadd_round_ps(A, B, C, R) \
3869 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), -1, R)
3871 #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
3872 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), U, R)
3874 #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
3875 (__m512)__builtin_ia32_vfmsubaddps512_mask3(A, B, C, U, R)
3877 #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
3878 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, -(C), U, R)
3880 #define _mm512_fnmadd_round_pd(A, B, C, R) \
3881 (__m512d)__builtin_ia32_vfnmaddpd512_mask(A, B, C, -1, R)
3883 #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
3884 (__m512d)__builtin_ia32_vfnmaddpd512_mask(A, B, C, U, R)
3886 #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
3887 (__m512d)__builtin_ia32_vfnmaddpd512_mask3(A, B, C, U, R)
3889 #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
3890 (__m512d)__builtin_ia32_vfnmaddpd512_maskz(A, B, C, U, R)
3892 #define _mm512_fnmadd_round_ps(A, B, C, R) \
3893 (__m512)__builtin_ia32_vfnmaddps512_mask(A, B, C, -1, R)
3895 #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
3896 (__m512)__builtin_ia32_vfnmaddps512_mask(A, B, C, U, R)
3898 #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
3899 (__m512)__builtin_ia32_vfnmaddps512_mask3(A, B, C, U, R)
3901 #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
3902 (__m512)__builtin_ia32_vfnmaddps512_maskz(A, B, C, U, R)
3904 #define _mm512_fnmsub_round_pd(A, B, C, R) \
3905 (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, -1, R)
3907 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
3908 (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, U, R)
3910 #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
3911 (__m512d)__builtin_ia32_vfnmsubpd512_mask3(A, B, C, U, R)
3913 #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
3914 (__m512d)__builtin_ia32_vfnmsubpd512_maskz(A, B, C, U, R)
3916 #define _mm512_fnmsub_round_ps(A, B, C, R) \
3917 (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, -1, R)
3919 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
3920 (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, U, R)
3922 #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
3923 (__m512)__builtin_ia32_vfnmsubps512_mask3(A, B, C, U, R)
3925 #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
3926 (__m512)__builtin_ia32_vfnmsubps512_maskz(A, B, C, U, R)
3927 #endif
3929 extern __inline __m512i
3930 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3931 _mm512_abs_epi64 (__m512i __A)
3933 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3934 (__v8di)
3935 _mm512_undefined_epi32 (),
3936 (__mmask8) -1);
3939 extern __inline __m512i
3940 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3941 _mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
3943 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3944 (__v8di) __W,
3945 (__mmask8) __U);
3948 extern __inline __m512i
3949 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3950 _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
3952 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3953 (__v8di)
3954 _mm512_setzero_si512 (),
3955 (__mmask8) __U);
3958 extern __inline __m512i
3959 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3960 _mm512_abs_epi32 (__m512i __A)
3962 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3963 (__v16si)
3964 _mm512_undefined_epi32 (),
3965 (__mmask16) -1);
3968 extern __inline __m512i
3969 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3970 _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
3972 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3973 (__v16si) __W,
3974 (__mmask16) __U);
3977 extern __inline __m512i
3978 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3979 _mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
3981 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3982 (__v16si)
3983 _mm512_setzero_si512 (),
3984 (__mmask16) __U);
3987 extern __inline __m512
3988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3989 _mm512_broadcastss_ps (__m128 __A)
3991 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3992 (__v16sf)
3993 _mm512_undefined_ps (),
3994 (__mmask16) -1);
3997 extern __inline __m512
3998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3999 _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
4001 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
4002 (__v16sf) __O, __M);
4005 extern __inline __m512
4006 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4007 _mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
4009 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
4010 (__v16sf)
4011 _mm512_setzero_ps (),
4012 __M);
4015 extern __inline __m512d
4016 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4017 _mm512_broadcastsd_pd (__m128d __A)
4019 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
4020 (__v8df)
4021 _mm512_undefined_pd (),
4022 (__mmask8) -1);
4025 extern __inline __m512d
4026 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4027 _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
4029 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
4030 (__v8df) __O, __M);
4033 extern __inline __m512d
4034 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4035 _mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
4037 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
4038 (__v8df)
4039 _mm512_setzero_pd (),
4040 __M);
4043 extern __inline __m512i
4044 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4045 _mm512_broadcastd_epi32 (__m128i __A)
4047 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
4048 (__v16si)
4049 _mm512_undefined_epi32 (),
4050 (__mmask16) -1);
4053 extern __inline __m512i
4054 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4055 _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
4057 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
4058 (__v16si) __O, __M);
4061 extern __inline __m512i
4062 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4063 _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
4065 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
4066 (__v16si)
4067 _mm512_setzero_si512 (),
4068 __M);
4071 extern __inline __m512i
4072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4073 _mm512_set1_epi32 (int __A)
4075 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
4076 (__v16si)
4077 _mm512_undefined_epi32 (),
4078 (__mmask16)(-1));
4081 extern __inline __m512i
4082 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4083 _mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
4085 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O,
4086 __M);
4089 extern __inline __m512i
4090 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4091 _mm512_maskz_set1_epi32 (__mmask16 __M, int __A)
4093 return (__m512i)
4094 __builtin_ia32_pbroadcastd512_gpr_mask (__A,
4095 (__v16si) _mm512_setzero_si512 (),
4096 __M);
4099 extern __inline __m512i
4100 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4101 _mm512_broadcastq_epi64 (__m128i __A)
4103 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
4104 (__v8di)
4105 _mm512_undefined_epi32 (),
4106 (__mmask8) -1);
4109 extern __inline __m512i
4110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4111 _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
4113 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
4114 (__v8di) __O, __M);
4117 extern __inline __m512i
4118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4119 _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
4121 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
4122 (__v8di)
4123 _mm512_setzero_si512 (),
4124 __M);
4127 extern __inline __m512i
4128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4129 _mm512_set1_epi64 (long long __A)
4131 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
4132 (__v8di)
4133 _mm512_undefined_epi32 (),
4134 (__mmask8)(-1));
4137 extern __inline __m512i
4138 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4139 _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
4141 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
4142 __M);
4145 extern __inline __m512i
4146 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4147 _mm512_maskz_set1_epi64 (__mmask8 __M, long long __A)
4149 return (__m512i)
4150 __builtin_ia32_pbroadcastq512_gpr_mask (__A,
4151 (__v8di) _mm512_setzero_si512 (),
4152 __M);
4155 extern __inline __m512
4156 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4157 _mm512_broadcast_f32x4 (__m128 __A)
4159 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
4160 (__v16sf)
4161 _mm512_undefined_ps (),
4162 (__mmask16) -1);
4165 extern __inline __m512
4166 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4167 _mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A)
4169 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
4170 (__v16sf) __O,
4171 __M);
4174 extern __inline __m512
4175 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4176 _mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A)
4178 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
4179 (__v16sf)
4180 _mm512_setzero_ps (),
4181 __M);
4184 extern __inline __m512i
4185 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4186 _mm512_broadcast_i32x4 (__m128i __A)
4188 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
4189 (__v16si)
4190 _mm512_undefined_epi32 (),
4191 (__mmask16) -1);
4194 extern __inline __m512i
4195 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4196 _mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A)
4198 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
4199 (__v16si) __O,
4200 __M);
4203 extern __inline __m512i
4204 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4205 _mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A)
4207 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
4208 (__v16si)
4209 _mm512_setzero_si512 (),
4210 __M);
4213 extern __inline __m512d
4214 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4215 _mm512_broadcast_f64x4 (__m256d __A)
4217 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
4218 (__v8df)
4219 _mm512_undefined_pd (),
4220 (__mmask8) -1);
4223 extern __inline __m512d
4224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4225 _mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A)
4227 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
4228 (__v8df) __O,
4229 __M);
4232 extern __inline __m512d
4233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4234 _mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A)
4236 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
4237 (__v8df)
4238 _mm512_setzero_pd (),
4239 __M);
4242 extern __inline __m512i
4243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4244 _mm512_broadcast_i64x4 (__m256i __A)
4246 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
4247 (__v8di)
4248 _mm512_undefined_epi32 (),
4249 (__mmask8) -1);
4252 extern __inline __m512i
4253 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4254 _mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A)
4256 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
4257 (__v8di) __O,
4258 __M);
4261 extern __inline __m512i
4262 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4263 _mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A)
4265 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
4266 (__v8di)
4267 _mm512_setzero_si512 (),
4268 __M);
4271 typedef enum
4273 _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
4274 _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
4275 _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
4276 _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
4277 _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
4278 _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
4279 _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
4280 _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
4281 _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
4282 _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
4283 _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
4284 _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
4285 _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
4286 _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
4287 _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
4288 _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
4289 _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
4290 _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
4291 _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
4292 _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
4293 _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
4294 _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
4295 _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
4296 _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
4297 _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
4298 _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
4299 _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
4300 _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
4301 _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
4302 _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
4303 _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
4304 _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
4305 _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
4306 _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
4307 _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
4308 _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
4309 _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
4310 _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
4311 _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
4312 _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
4313 _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
4314 _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
4315 _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
4316 _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
4317 _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
4318 _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
4319 _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
4320 _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
4321 _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
4322 _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
4323 _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
4324 _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
4325 _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
4326 _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
4327 _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
4328 _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
4329 _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
4330 _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
4331 _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
4332 _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
4333 _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
4334 _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
4335 _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
4336 _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
4337 _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
4338 _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
4339 _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
4340 _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
4341 _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
4342 _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
4343 _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
4344 _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
4345 _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
4346 _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
4347 _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
4348 _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
4349 _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
4350 _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
4351 _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
4352 _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
4353 _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
4354 _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
4355 _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
4356 _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
4357 _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
4358 _MM_PERM_DDDD = 0xFF
4359 } _MM_PERM_ENUM;
4361 #ifdef __OPTIMIZE__
4362 extern __inline __m512i
4363 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4364 _mm512_shuffle_epi32 (__m512i __A, _MM_PERM_ENUM __mask)
4366 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
4367 __mask,
4368 (__v16si)
4369 _mm512_undefined_epi32 (),
4370 (__mmask16) -1);
4373 extern __inline __m512i
4374 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4375 _mm512_mask_shuffle_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
4376 _MM_PERM_ENUM __mask)
4378 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
4379 __mask,
4380 (__v16si) __W,
4381 (__mmask16) __U);
4384 extern __inline __m512i
4385 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4386 _mm512_maskz_shuffle_epi32 (__mmask16 __U, __m512i __A, _MM_PERM_ENUM __mask)
4388 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
4389 __mask,
4390 (__v16si)
4391 _mm512_setzero_si512 (),
4392 (__mmask16) __U);
4395 extern __inline __m512i
4396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4397 _mm512_shuffle_i64x2 (__m512i __A, __m512i __B, const int __imm)
4399 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
4400 (__v8di) __B, __imm,
4401 (__v8di)
4402 _mm512_undefined_epi32 (),
4403 (__mmask8) -1);
4406 extern __inline __m512i
4407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4408 _mm512_mask_shuffle_i64x2 (__m512i __W, __mmask8 __U, __m512i __A,
4409 __m512i __B, const int __imm)
4411 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
4412 (__v8di) __B, __imm,
4413 (__v8di) __W,
4414 (__mmask8) __U);
4417 extern __inline __m512i
4418 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4419 _mm512_maskz_shuffle_i64x2 (__mmask8 __U, __m512i __A, __m512i __B,
4420 const int __imm)
4422 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
4423 (__v8di) __B, __imm,
4424 (__v8di)
4425 _mm512_setzero_si512 (),
4426 (__mmask8) __U);
4429 extern __inline __m512i
4430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4431 _mm512_shuffle_i32x4 (__m512i __A, __m512i __B, const int __imm)
4433 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
4434 (__v16si) __B,
4435 __imm,
4436 (__v16si)
4437 _mm512_undefined_epi32 (),
4438 (__mmask16) -1);
4441 extern __inline __m512i
4442 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4443 _mm512_mask_shuffle_i32x4 (__m512i __W, __mmask16 __U, __m512i __A,
4444 __m512i __B, const int __imm)
4446 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
4447 (__v16si) __B,
4448 __imm,
4449 (__v16si) __W,
4450 (__mmask16) __U);
4453 extern __inline __m512i
4454 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4455 _mm512_maskz_shuffle_i32x4 (__mmask16 __U, __m512i __A, __m512i __B,
4456 const int __imm)
4458 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
4459 (__v16si) __B,
4460 __imm,
4461 (__v16si)
4462 _mm512_setzero_si512 (),
4463 (__mmask16) __U);
4466 extern __inline __m512d
4467 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4468 _mm512_shuffle_f64x2 (__m512d __A, __m512d __B, const int __imm)
4470 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
4471 (__v8df) __B, __imm,
4472 (__v8df)
4473 _mm512_undefined_pd (),
4474 (__mmask8) -1);
4477 extern __inline __m512d
4478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4479 _mm512_mask_shuffle_f64x2 (__m512d __W, __mmask8 __U, __m512d __A,
4480 __m512d __B, const int __imm)
4482 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
4483 (__v8df) __B, __imm,
4484 (__v8df) __W,
4485 (__mmask8) __U);
4488 extern __inline __m512d
4489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4490 _mm512_maskz_shuffle_f64x2 (__mmask8 __U, __m512d __A, __m512d __B,
4491 const int __imm)
4493 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
4494 (__v8df) __B, __imm,
4495 (__v8df)
4496 _mm512_setzero_pd (),
4497 (__mmask8) __U);
4500 extern __inline __m512
4501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4502 _mm512_shuffle_f32x4 (__m512 __A, __m512 __B, const int __imm)
4504 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4505 (__v16sf) __B, __imm,
4506 (__v16sf)
4507 _mm512_undefined_ps (),
4508 (__mmask16) -1);
4511 extern __inline __m512
4512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4513 _mm512_mask_shuffle_f32x4 (__m512 __W, __mmask16 __U, __m512 __A,
4514 __m512 __B, const int __imm)
4516 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4517 (__v16sf) __B, __imm,
4518 (__v16sf) __W,
4519 (__mmask16) __U);
4522 extern __inline __m512
4523 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4524 _mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B,
4525 const int __imm)
4527 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4528 (__v16sf) __B, __imm,
4529 (__v16sf)
4530 _mm512_setzero_ps (),
4531 (__mmask16) __U);
4534 #else
4535 #define _mm512_shuffle_epi32(X, C) \
4536 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4537 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
4538 (__mmask16)-1))
4540 #define _mm512_mask_shuffle_epi32(W, U, X, C) \
4541 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4542 (__v16si)(__m512i)(W),\
4543 (__mmask16)(U)))
4545 #define _mm512_maskz_shuffle_epi32(U, X, C) \
4546 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4547 (__v16si)(__m512i)_mm512_setzero_si512 (),\
4548 (__mmask16)(U)))
4550 #define _mm512_shuffle_i64x2(X, Y, C) \
4551 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4552 (__v8di)(__m512i)(Y), (int)(C),\
4553 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
4554 (__mmask8)-1))
4556 #define _mm512_mask_shuffle_i64x2(W, U, X, Y, C) \
4557 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4558 (__v8di)(__m512i)(Y), (int)(C),\
4559 (__v8di)(__m512i)(W),\
4560 (__mmask8)(U)))
4562 #define _mm512_maskz_shuffle_i64x2(U, X, Y, C) \
4563 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4564 (__v8di)(__m512i)(Y), (int)(C),\
4565 (__v8di)(__m512i)_mm512_setzero_si512 (),\
4566 (__mmask8)(U)))
4568 #define _mm512_shuffle_i32x4(X, Y, C) \
4569 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4570 (__v16si)(__m512i)(Y), (int)(C),\
4571 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
4572 (__mmask16)-1))
4574 #define _mm512_mask_shuffle_i32x4(W, U, X, Y, C) \
4575 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4576 (__v16si)(__m512i)(Y), (int)(C),\
4577 (__v16si)(__m512i)(W),\
4578 (__mmask16)(U)))
4580 #define _mm512_maskz_shuffle_i32x4(U, X, Y, C) \
4581 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4582 (__v16si)(__m512i)(Y), (int)(C),\
4583 (__v16si)(__m512i)_mm512_setzero_si512 (),\
4584 (__mmask16)(U)))
4586 #define _mm512_shuffle_f64x2(X, Y, C) \
4587 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4588 (__v8df)(__m512d)(Y), (int)(C),\
4589 (__v8df)(__m512d)_mm512_undefined_pd(),\
4590 (__mmask8)-1))
4592 #define _mm512_mask_shuffle_f64x2(W, U, X, Y, C) \
4593 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4594 (__v8df)(__m512d)(Y), (int)(C),\
4595 (__v8df)(__m512d)(W),\
4596 (__mmask8)(U)))
4598 #define _mm512_maskz_shuffle_f64x2(U, X, Y, C) \
4599 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4600 (__v8df)(__m512d)(Y), (int)(C),\
4601 (__v8df)(__m512d)_mm512_setzero_pd(),\
4602 (__mmask8)(U)))
4604 #define _mm512_shuffle_f32x4(X, Y, C) \
4605 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4606 (__v16sf)(__m512)(Y), (int)(C),\
4607 (__v16sf)(__m512)_mm512_undefined_ps(),\
4608 (__mmask16)-1))
4610 #define _mm512_mask_shuffle_f32x4(W, U, X, Y, C) \
4611 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4612 (__v16sf)(__m512)(Y), (int)(C),\
4613 (__v16sf)(__m512)(W),\
4614 (__mmask16)(U)))
4616 #define _mm512_maskz_shuffle_f32x4(U, X, Y, C) \
4617 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4618 (__v16sf)(__m512)(Y), (int)(C),\
4619 (__v16sf)(__m512)_mm512_setzero_ps(),\
4620 (__mmask16)(U)))
4621 #endif
4623 extern __inline __m512i
4624 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4625 _mm512_rolv_epi32 (__m512i __A, __m512i __B)
4627 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4628 (__v16si) __B,
4629 (__v16si)
4630 _mm512_undefined_epi32 (),
4631 (__mmask16) -1);
4634 extern __inline __m512i
4635 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4636 _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4638 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4639 (__v16si) __B,
4640 (__v16si) __W,
4641 (__mmask16) __U);
4644 extern __inline __m512i
4645 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4646 _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4648 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4649 (__v16si) __B,
4650 (__v16si)
4651 _mm512_setzero_si512 (),
4652 (__mmask16) __U);
4655 extern __inline __m512i
4656 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4657 _mm512_rorv_epi32 (__m512i __A, __m512i __B)
4659 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4660 (__v16si) __B,
4661 (__v16si)
4662 _mm512_undefined_epi32 (),
4663 (__mmask16) -1);
4666 extern __inline __m512i
4667 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4668 _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4670 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4671 (__v16si) __B,
4672 (__v16si) __W,
4673 (__mmask16) __U);
4676 extern __inline __m512i
4677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4678 _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4680 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4681 (__v16si) __B,
4682 (__v16si)
4683 _mm512_setzero_si512 (),
4684 (__mmask16) __U);
4687 extern __inline __m512i
4688 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4689 _mm512_rolv_epi64 (__m512i __A, __m512i __B)
4691 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4692 (__v8di) __B,
4693 (__v8di)
4694 _mm512_undefined_epi32 (),
4695 (__mmask8) -1);
4698 extern __inline __m512i
4699 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4700 _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4702 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4703 (__v8di) __B,
4704 (__v8di) __W,
4705 (__mmask8) __U);
4708 extern __inline __m512i
4709 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4710 _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4712 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4713 (__v8di) __B,
4714 (__v8di)
4715 _mm512_setzero_si512 (),
4716 (__mmask8) __U);
4719 extern __inline __m512i
4720 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4721 _mm512_rorv_epi64 (__m512i __A, __m512i __B)
4723 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4724 (__v8di) __B,
4725 (__v8di)
4726 _mm512_undefined_epi32 (),
4727 (__mmask8) -1);
4730 extern __inline __m512i
4731 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4732 _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4734 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4735 (__v8di) __B,
4736 (__v8di) __W,
4737 (__mmask8) __U);
4740 extern __inline __m512i
4741 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4742 _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4744 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4745 (__v8di) __B,
4746 (__v8di)
4747 _mm512_setzero_si512 (),
4748 (__mmask8) __U);
4751 #ifdef __OPTIMIZE__
4752 extern __inline __m256i
4753 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4754 _mm512_cvtt_roundpd_epi32 (__m512d __A, const int __R)
4756 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4757 (__v8si)
4758 _mm256_undefined_si256 (),
4759 (__mmask8) -1, __R);
4762 extern __inline __m256i
4763 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4764 _mm512_mask_cvtt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4765 const int __R)
4767 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4768 (__v8si) __W,
4769 (__mmask8) __U, __R);
4772 extern __inline __m256i
4773 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4774 _mm512_maskz_cvtt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4776 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4777 (__v8si)
4778 _mm256_setzero_si256 (),
4779 (__mmask8) __U, __R);
4782 extern __inline __m256i
4783 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4784 _mm512_cvtt_roundpd_epu32 (__m512d __A, const int __R)
4786 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4787 (__v8si)
4788 _mm256_undefined_si256 (),
4789 (__mmask8) -1, __R);
4792 extern __inline __m256i
4793 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4794 _mm512_mask_cvtt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4795 const int __R)
4797 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4798 (__v8si) __W,
4799 (__mmask8) __U, __R);
4802 extern __inline __m256i
4803 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4804 _mm512_maskz_cvtt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4806 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4807 (__v8si)
4808 _mm256_setzero_si256 (),
4809 (__mmask8) __U, __R);
4811 #else
4812 #define _mm512_cvtt_roundpd_epi32(A, B) \
4813 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4815 #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, B) \
4816 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)(W), U, B))
4818 #define _mm512_maskz_cvtt_roundpd_epi32(U, A, B) \
4819 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4821 #define _mm512_cvtt_roundpd_epu32(A, B) \
4822 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4824 #define _mm512_mask_cvtt_roundpd_epu32(W, U, A, B) \
4825 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)(W), U, B))
4827 #define _mm512_maskz_cvtt_roundpd_epu32(U, A, B) \
4828 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4829 #endif
4831 #ifdef __OPTIMIZE__
4832 extern __inline __m256i
4833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4834 _mm512_cvt_roundpd_epi32 (__m512d __A, const int __R)
4836 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4837 (__v8si)
4838 _mm256_undefined_si256 (),
4839 (__mmask8) -1, __R);
4842 extern __inline __m256i
4843 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4844 _mm512_mask_cvt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4845 const int __R)
4847 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4848 (__v8si) __W,
4849 (__mmask8) __U, __R);
4852 extern __inline __m256i
4853 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4854 _mm512_maskz_cvt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4856 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4857 (__v8si)
4858 _mm256_setzero_si256 (),
4859 (__mmask8) __U, __R);
4862 extern __inline __m256i
4863 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4864 _mm512_cvt_roundpd_epu32 (__m512d __A, const int __R)
4866 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4867 (__v8si)
4868 _mm256_undefined_si256 (),
4869 (__mmask8) -1, __R);
4872 extern __inline __m256i
4873 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4874 _mm512_mask_cvt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4875 const int __R)
4877 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4878 (__v8si) __W,
4879 (__mmask8) __U, __R);
4882 extern __inline __m256i
4883 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4884 _mm512_maskz_cvt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4886 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4887 (__v8si)
4888 _mm256_setzero_si256 (),
4889 (__mmask8) __U, __R);
4891 #else
4892 #define _mm512_cvt_roundpd_epi32(A, B) \
4893 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4895 #define _mm512_mask_cvt_roundpd_epi32(W, U, A, B) \
4896 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)(W), U, B))
4898 #define _mm512_maskz_cvt_roundpd_epi32(U, A, B) \
4899 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4901 #define _mm512_cvt_roundpd_epu32(A, B) \
4902 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4904 #define _mm512_mask_cvt_roundpd_epu32(W, U, A, B) \
4905 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)(W), U, B))
4907 #define _mm512_maskz_cvt_roundpd_epu32(U, A, B) \
4908 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4909 #endif
4911 #ifdef __OPTIMIZE__
4912 extern __inline __m512i
4913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4914 _mm512_cvtt_roundps_epi32 (__m512 __A, const int __R)
4916 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4917 (__v16si)
4918 _mm512_undefined_epi32 (),
4919 (__mmask16) -1, __R);
4922 extern __inline __m512i
4923 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4924 _mm512_mask_cvtt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4925 const int __R)
4927 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4928 (__v16si) __W,
4929 (__mmask16) __U, __R);
4932 extern __inline __m512i
4933 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4934 _mm512_maskz_cvtt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4936 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4937 (__v16si)
4938 _mm512_setzero_si512 (),
4939 (__mmask16) __U, __R);
4942 extern __inline __m512i
4943 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4944 _mm512_cvtt_roundps_epu32 (__m512 __A, const int __R)
4946 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4947 (__v16si)
4948 _mm512_undefined_epi32 (),
4949 (__mmask16) -1, __R);
4952 extern __inline __m512i
4953 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4954 _mm512_mask_cvtt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
4955 const int __R)
4957 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4958 (__v16si) __W,
4959 (__mmask16) __U, __R);
4962 extern __inline __m512i
4963 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4964 _mm512_maskz_cvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
4966 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4967 (__v16si)
4968 _mm512_setzero_si512 (),
4969 (__mmask16) __U, __R);
4971 #else
4972 #define _mm512_cvtt_roundps_epi32(A, B) \
4973 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
4975 #define _mm512_mask_cvtt_roundps_epi32(W, U, A, B) \
4976 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)(W), U, B))
4978 #define _mm512_maskz_cvtt_roundps_epi32(U, A, B) \
4979 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4981 #define _mm512_cvtt_roundps_epu32(A, B) \
4982 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
4984 #define _mm512_mask_cvtt_roundps_epu32(W, U, A, B) \
4985 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)(W), U, B))
4987 #define _mm512_maskz_cvtt_roundps_epu32(U, A, B) \
4988 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4989 #endif
4991 #ifdef __OPTIMIZE__
4992 extern __inline __m512i
4993 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4994 _mm512_cvt_roundps_epi32 (__m512 __A, const int __R)
4996 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4997 (__v16si)
4998 _mm512_undefined_epi32 (),
4999 (__mmask16) -1, __R);
5002 extern __inline __m512i
5003 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5004 _mm512_mask_cvt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
5005 const int __R)
5007 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
5008 (__v16si) __W,
5009 (__mmask16) __U, __R);
5012 extern __inline __m512i
5013 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5014 _mm512_maskz_cvt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
5016 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
5017 (__v16si)
5018 _mm512_setzero_si512 (),
5019 (__mmask16) __U, __R);
5022 extern __inline __m512i
5023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5024 _mm512_cvt_roundps_epu32 (__m512 __A, const int __R)
5026 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
5027 (__v16si)
5028 _mm512_undefined_epi32 (),
5029 (__mmask16) -1, __R);
5032 extern __inline __m512i
5033 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5034 _mm512_mask_cvt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
5035 const int __R)
5037 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
5038 (__v16si) __W,
5039 (__mmask16) __U, __R);
5042 extern __inline __m512i
5043 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5044 _mm512_maskz_cvt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
5046 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
5047 (__v16si)
5048 _mm512_setzero_si512 (),
5049 (__mmask16) __U, __R);
5051 #else
5052 #define _mm512_cvt_roundps_epi32(A, B) \
5053 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
5055 #define _mm512_mask_cvt_roundps_epi32(W, U, A, B) \
5056 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)(W), U, B))
5058 #define _mm512_maskz_cvt_roundps_epi32(U, A, B) \
5059 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
5061 #define _mm512_cvt_roundps_epu32(A, B) \
5062 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
5064 #define _mm512_mask_cvt_roundps_epu32(W, U, A, B) \
5065 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)(W), U, B))
5067 #define _mm512_maskz_cvt_roundps_epu32(U, A, B) \
5068 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
5069 #endif
5071 extern __inline __m128d
5072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5073 _mm_cvtu32_sd (__m128d __A, unsigned __B)
5075 return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
5078 #ifdef __x86_64__
5079 #ifdef __OPTIMIZE__
5080 extern __inline __m128d
5081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5082 _mm_cvt_roundu64_sd (__m128d __A, unsigned long long __B, const int __R)
5084 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, __R);
5087 extern __inline __m128d
5088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5089 _mm_cvt_roundi64_sd (__m128d __A, long long __B, const int __R)
5091 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
5094 extern __inline __m128d
5095 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5096 _mm_cvt_roundsi64_sd (__m128d __A, long long __B, const int __R)
5098 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
5100 #else
5101 #define _mm_cvt_roundu64_sd(A, B, C) \
5102 (__m128d)__builtin_ia32_cvtusi2sd64(A, B, C)
5104 #define _mm_cvt_roundi64_sd(A, B, C) \
5105 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
5107 #define _mm_cvt_roundsi64_sd(A, B, C) \
5108 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
5109 #endif
5111 #endif
5113 #ifdef __OPTIMIZE__
5114 extern __inline __m128
5115 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5116 _mm_cvt_roundu32_ss (__m128 __A, unsigned __B, const int __R)
5118 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, __R);
5121 extern __inline __m128
5122 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5123 _mm_cvt_roundsi32_ss (__m128 __A, int __B, const int __R)
5125 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
5128 extern __inline __m128
5129 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5130 _mm_cvt_roundi32_ss (__m128 __A, int __B, const int __R)
5132 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
5134 #else
5135 #define _mm_cvt_roundu32_ss(A, B, C) \
5136 (__m128)__builtin_ia32_cvtusi2ss32(A, B, C)
5138 #define _mm_cvt_roundi32_ss(A, B, C) \
5139 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
5141 #define _mm_cvt_roundsi32_ss(A, B, C) \
5142 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
5143 #endif
5145 #ifdef __x86_64__
5146 #ifdef __OPTIMIZE__
5147 extern __inline __m128
5148 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5149 _mm_cvt_roundu64_ss (__m128 __A, unsigned long long __B, const int __R)
5151 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, __R);
5154 extern __inline __m128
5155 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5156 _mm_cvt_roundsi64_ss (__m128 __A, long long __B, const int __R)
5158 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
5161 extern __inline __m128
5162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5163 _mm_cvt_roundi64_ss (__m128 __A, long long __B, const int __R)
5165 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
5167 #else
5168 #define _mm_cvt_roundu64_ss(A, B, C) \
5169 (__m128)__builtin_ia32_cvtusi2ss64(A, B, C)
5171 #define _mm_cvt_roundi64_ss(A, B, C) \
5172 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
5174 #define _mm_cvt_roundsi64_ss(A, B, C) \
5175 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
5176 #endif
5178 #endif
5180 extern __inline __m128i
5181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5182 _mm512_cvtepi32_epi8 (__m512i __A)
5184 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
5185 (__v16qi)
5186 _mm_undefined_si128 (),
5187 (__mmask16) -1);
5190 extern __inline void
5191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5192 _mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
5194 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
5197 extern __inline __m128i
5198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5199 _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
5201 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
5202 (__v16qi) __O, __M);
5205 extern __inline __m128i
5206 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5207 _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
5209 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
5210 (__v16qi)
5211 _mm_setzero_si128 (),
5212 __M);
5215 extern __inline __m128i
5216 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5217 _mm512_cvtsepi32_epi8 (__m512i __A)
5219 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
5220 (__v16qi)
5221 _mm_undefined_si128 (),
5222 (__mmask16) -1);
5225 extern __inline void
5226 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5227 _mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
5229 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
5232 extern __inline __m128i
5233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5234 _mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
5236 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
5237 (__v16qi) __O, __M);
5240 extern __inline __m128i
5241 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5242 _mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
5244 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
5245 (__v16qi)
5246 _mm_setzero_si128 (),
5247 __M);
5250 extern __inline __m128i
5251 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5252 _mm512_cvtusepi32_epi8 (__m512i __A)
5254 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
5255 (__v16qi)
5256 _mm_undefined_si128 (),
5257 (__mmask16) -1);
5260 extern __inline void
5261 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5262 _mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
5264 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
5267 extern __inline __m128i
5268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5269 _mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
5271 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
5272 (__v16qi) __O,
5273 __M);
5276 extern __inline __m128i
5277 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5278 _mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
5280 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
5281 (__v16qi)
5282 _mm_setzero_si128 (),
5283 __M);
5286 extern __inline __m256i
5287 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5288 _mm512_cvtepi32_epi16 (__m512i __A)
5290 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
5291 (__v16hi)
5292 _mm256_undefined_si256 (),
5293 (__mmask16) -1);
5296 extern __inline void
5297 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5298 _mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
5300 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
5303 extern __inline __m256i
5304 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5305 _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
5307 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
5308 (__v16hi) __O, __M);
5311 extern __inline __m256i
5312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5313 _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
5315 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
5316 (__v16hi)
5317 _mm256_setzero_si256 (),
5318 __M);
5321 extern __inline __m256i
5322 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5323 _mm512_cvtsepi32_epi16 (__m512i __A)
5325 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
5326 (__v16hi)
5327 _mm256_undefined_si256 (),
5328 (__mmask16) -1);
5331 extern __inline void
5332 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5333 _mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
5335 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
5338 extern __inline __m256i
5339 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5340 _mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
5342 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
5343 (__v16hi) __O, __M);
5346 extern __inline __m256i
5347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5348 _mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
5350 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
5351 (__v16hi)
5352 _mm256_setzero_si256 (),
5353 __M);
5356 extern __inline __m256i
5357 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5358 _mm512_cvtusepi32_epi16 (__m512i __A)
5360 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
5361 (__v16hi)
5362 _mm256_undefined_si256 (),
5363 (__mmask16) -1);
5366 extern __inline void
5367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5368 _mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
5370 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
5373 extern __inline __m256i
5374 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5375 _mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
5377 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
5378 (__v16hi) __O,
5379 __M);
5382 extern __inline __m256i
5383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5384 _mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
5386 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
5387 (__v16hi)
5388 _mm256_setzero_si256 (),
5389 __M);
5392 extern __inline __m256i
5393 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5394 _mm512_cvtepi64_epi32 (__m512i __A)
5396 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
5397 (__v8si)
5398 _mm256_undefined_si256 (),
5399 (__mmask8) -1);
5402 extern __inline void
5403 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5404 _mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
5406 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
5409 extern __inline __m256i
5410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5411 _mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
5413 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
5414 (__v8si) __O, __M);
5417 extern __inline __m256i
5418 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5419 _mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
5421 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
5422 (__v8si)
5423 _mm256_setzero_si256 (),
5424 __M);
5427 extern __inline __m256i
5428 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5429 _mm512_cvtsepi64_epi32 (__m512i __A)
5431 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
5432 (__v8si)
5433 _mm256_undefined_si256 (),
5434 (__mmask8) -1);
5437 extern __inline void
5438 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5439 _mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
5441 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
5444 extern __inline __m256i
5445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5446 _mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
5448 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
5449 (__v8si) __O, __M);
5452 extern __inline __m256i
5453 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5454 _mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
5456 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
5457 (__v8si)
5458 _mm256_setzero_si256 (),
5459 __M);
5462 extern __inline __m256i
5463 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5464 _mm512_cvtusepi64_epi32 (__m512i __A)
5466 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
5467 (__v8si)
5468 _mm256_undefined_si256 (),
5469 (__mmask8) -1);
5472 extern __inline void
5473 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5474 _mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
5476 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
5479 extern __inline __m256i
5480 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5481 _mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
5483 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
5484 (__v8si) __O, __M);
5487 extern __inline __m256i
5488 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5489 _mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
5491 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
5492 (__v8si)
5493 _mm256_setzero_si256 (),
5494 __M);
5497 extern __inline __m128i
5498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5499 _mm512_cvtepi64_epi16 (__m512i __A)
5501 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5502 (__v8hi)
5503 _mm_undefined_si128 (),
5504 (__mmask8) -1);
5507 extern __inline void
5508 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5509 _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
5511 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
5514 extern __inline __m128i
5515 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5516 _mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5518 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5519 (__v8hi) __O, __M);
5522 extern __inline __m128i
5523 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5524 _mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
5526 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5527 (__v8hi)
5528 _mm_setzero_si128 (),
5529 __M);
5532 extern __inline __m128i
5533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5534 _mm512_cvtsepi64_epi16 (__m512i __A)
5536 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5537 (__v8hi)
5538 _mm_undefined_si128 (),
5539 (__mmask8) -1);
5542 extern __inline void
5543 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5544 _mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
5546 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
5549 extern __inline __m128i
5550 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5551 _mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5553 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5554 (__v8hi) __O, __M);
5557 extern __inline __m128i
5558 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5559 _mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
5561 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5562 (__v8hi)
5563 _mm_setzero_si128 (),
5564 __M);
5567 extern __inline __m128i
5568 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5569 _mm512_cvtusepi64_epi16 (__m512i __A)
5571 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5572 (__v8hi)
5573 _mm_undefined_si128 (),
5574 (__mmask8) -1);
5577 extern __inline void
5578 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5579 _mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
5581 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
5584 extern __inline __m128i
5585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5586 _mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5588 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5589 (__v8hi) __O, __M);
5592 extern __inline __m128i
5593 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5594 _mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
5596 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5597 (__v8hi)
5598 _mm_setzero_si128 (),
5599 __M);
5602 extern __inline __m128i
5603 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5604 _mm512_cvtepi64_epi8 (__m512i __A)
5606 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5607 (__v16qi)
5608 _mm_undefined_si128 (),
5609 (__mmask8) -1);
5612 extern __inline void
5613 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5614 _mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5616 __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5619 extern __inline __m128i
5620 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5621 _mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5623 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5624 (__v16qi) __O, __M);
5627 extern __inline __m128i
5628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5629 _mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
5631 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5632 (__v16qi)
5633 _mm_setzero_si128 (),
5634 __M);
5637 extern __inline __m128i
5638 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5639 _mm512_cvtsepi64_epi8 (__m512i __A)
5641 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5642 (__v16qi)
5643 _mm_undefined_si128 (),
5644 (__mmask8) -1);
5647 extern __inline void
5648 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5649 _mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5651 __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5654 extern __inline __m128i
5655 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5656 _mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5658 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5659 (__v16qi) __O, __M);
5662 extern __inline __m128i
5663 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5664 _mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
5666 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5667 (__v16qi)
5668 _mm_setzero_si128 (),
5669 __M);
5672 extern __inline __m128i
5673 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5674 _mm512_cvtusepi64_epi8 (__m512i __A)
5676 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5677 (__v16qi)
5678 _mm_undefined_si128 (),
5679 (__mmask8) -1);
5682 extern __inline void
5683 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5684 _mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5686 __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5689 extern __inline __m128i
5690 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5691 _mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5693 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5694 (__v16qi) __O,
5695 __M);
5698 extern __inline __m128i
5699 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5700 _mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
5702 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5703 (__v16qi)
5704 _mm_setzero_si128 (),
5705 __M);
5708 extern __inline __m512d
5709 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5710 _mm512_cvtepi32_pd (__m256i __A)
5712 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5713 (__v8df)
5714 _mm512_undefined_pd (),
5715 (__mmask8) -1);
5718 extern __inline __m512d
5719 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5720 _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5722 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5723 (__v8df) __W,
5724 (__mmask8) __U);
5727 extern __inline __m512d
5728 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5729 _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
5731 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5732 (__v8df)
5733 _mm512_setzero_pd (),
5734 (__mmask8) __U);
5737 extern __inline __m512d
5738 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5739 _mm512_cvtepu32_pd (__m256i __A)
5741 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5742 (__v8df)
5743 _mm512_undefined_pd (),
5744 (__mmask8) -1);
5747 extern __inline __m512d
5748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5749 _mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5751 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5752 (__v8df) __W,
5753 (__mmask8) __U);
5756 extern __inline __m512d
5757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5758 _mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
5760 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5761 (__v8df)
5762 _mm512_setzero_pd (),
5763 (__mmask8) __U);
5766 #ifdef __OPTIMIZE__
5767 extern __inline __m512
5768 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5769 _mm512_cvt_roundepi32_ps (__m512i __A, const int __R)
5771 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5772 (__v16sf)
5773 _mm512_undefined_ps (),
5774 (__mmask16) -1, __R);
5777 extern __inline __m512
5778 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5779 _mm512_mask_cvt_roundepi32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5780 const int __R)
5782 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5783 (__v16sf) __W,
5784 (__mmask16) __U, __R);
5787 extern __inline __m512
5788 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5789 _mm512_maskz_cvt_roundepi32_ps (__mmask16 __U, __m512i __A, const int __R)
5791 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5792 (__v16sf)
5793 _mm512_setzero_ps (),
5794 (__mmask16) __U, __R);
5797 extern __inline __m512
5798 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5799 _mm512_cvt_roundepu32_ps (__m512i __A, const int __R)
5801 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5802 (__v16sf)
5803 _mm512_undefined_ps (),
5804 (__mmask16) -1, __R);
5807 extern __inline __m512
5808 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5809 _mm512_mask_cvt_roundepu32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5810 const int __R)
5812 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5813 (__v16sf) __W,
5814 (__mmask16) __U, __R);
5817 extern __inline __m512
5818 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5819 _mm512_maskz_cvt_roundepu32_ps (__mmask16 __U, __m512i __A, const int __R)
5821 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5822 (__v16sf)
5823 _mm512_setzero_ps (),
5824 (__mmask16) __U, __R);
5827 #else
5828 #define _mm512_cvt_roundepi32_ps(A, B) \
5829 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
5831 #define _mm512_mask_cvt_roundepi32_ps(W, U, A, B) \
5832 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), W, U, B)
5834 #define _mm512_maskz_cvt_roundepi32_ps(U, A, B) \
5835 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5837 #define _mm512_cvt_roundepu32_ps(A, B) \
5838 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
5840 #define _mm512_mask_cvt_roundepu32_ps(W, U, A, B) \
5841 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), W, U, B)
5843 #define _mm512_maskz_cvt_roundepu32_ps(U, A, B) \
5844 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5845 #endif
5847 #ifdef __OPTIMIZE__
5848 extern __inline __m256d
5849 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5850 _mm512_extractf64x4_pd (__m512d __A, const int __imm)
5852 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5853 __imm,
5854 (__v4df)
5855 _mm256_undefined_pd (),
5856 (__mmask8) -1);
5859 extern __inline __m256d
5860 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5861 _mm512_mask_extractf64x4_pd (__m256d __W, __mmask8 __U, __m512d __A,
5862 const int __imm)
5864 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5865 __imm,
5866 (__v4df) __W,
5867 (__mmask8) __U);
5870 extern __inline __m256d
5871 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5872 _mm512_maskz_extractf64x4_pd (__mmask8 __U, __m512d __A, const int __imm)
5874 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5875 __imm,
5876 (__v4df)
5877 _mm256_setzero_pd (),
5878 (__mmask8) __U);
5881 extern __inline __m128
5882 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5883 _mm512_extractf32x4_ps (__m512 __A, const int __imm)
5885 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5886 __imm,
5887 (__v4sf)
5888 _mm_undefined_ps (),
5889 (__mmask8) -1);
5892 extern __inline __m128
5893 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5894 _mm512_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m512 __A,
5895 const int __imm)
5897 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5898 __imm,
5899 (__v4sf) __W,
5900 (__mmask8) __U);
5903 extern __inline __m128
5904 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5905 _mm512_maskz_extractf32x4_ps (__mmask8 __U, __m512 __A, const int __imm)
5907 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5908 __imm,
5909 (__v4sf)
5910 _mm_setzero_ps (),
5911 (__mmask8) __U);
5914 extern __inline __m256i
5915 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5916 _mm512_extracti64x4_epi64 (__m512i __A, const int __imm)
5918 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5919 __imm,
5920 (__v4di)
5921 _mm256_undefined_si256 (),
5922 (__mmask8) -1);
5925 extern __inline __m256i
5926 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5927 _mm512_mask_extracti64x4_epi64 (__m256i __W, __mmask8 __U, __m512i __A,
5928 const int __imm)
5930 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5931 __imm,
5932 (__v4di) __W,
5933 (__mmask8) __U);
5936 extern __inline __m256i
5937 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5938 _mm512_maskz_extracti64x4_epi64 (__mmask8 __U, __m512i __A, const int __imm)
5940 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5941 __imm,
5942 (__v4di)
5943 _mm256_setzero_si256 (),
5944 (__mmask8) __U);
5947 extern __inline __m128i
5948 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5949 _mm512_extracti32x4_epi32 (__m512i __A, const int __imm)
5951 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5952 __imm,
5953 (__v4si)
5954 _mm_undefined_si128 (),
5955 (__mmask8) -1);
5958 extern __inline __m128i
5959 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5960 _mm512_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m512i __A,
5961 const int __imm)
5963 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5964 __imm,
5965 (__v4si) __W,
5966 (__mmask8) __U);
5969 extern __inline __m128i
5970 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5971 _mm512_maskz_extracti32x4_epi32 (__mmask8 __U, __m512i __A, const int __imm)
5973 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5974 __imm,
5975 (__v4si)
5976 _mm_setzero_si128 (),
5977 (__mmask8) __U);
5979 #else
5981 #define _mm512_extractf64x4_pd(X, C) \
5982 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5983 (int) (C),\
5984 (__v4df)(__m256d)_mm256_undefined_pd(),\
5985 (__mmask8)-1))
5987 #define _mm512_mask_extractf64x4_pd(W, U, X, C) \
5988 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5989 (int) (C),\
5990 (__v4df)(__m256d)(W),\
5991 (__mmask8)(U)))
5993 #define _mm512_maskz_extractf64x4_pd(U, X, C) \
5994 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5995 (int) (C),\
5996 (__v4df)(__m256d)_mm256_setzero_pd(),\
5997 (__mmask8)(U)))
5999 #define _mm512_extractf32x4_ps(X, C) \
6000 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
6001 (int) (C),\
6002 (__v4sf)(__m128)_mm_undefined_ps(),\
6003 (__mmask8)-1))
6005 #define _mm512_mask_extractf32x4_ps(W, U, X, C) \
6006 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
6007 (int) (C),\
6008 (__v4sf)(__m128)(W),\
6009 (__mmask8)(U)))
6011 #define _mm512_maskz_extractf32x4_ps(U, X, C) \
6012 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
6013 (int) (C),\
6014 (__v4sf)(__m128)_mm_setzero_ps(),\
6015 (__mmask8)(U)))
6017 #define _mm512_extracti64x4_epi64(X, C) \
6018 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
6019 (int) (C),\
6020 (__v4di)(__m256i)_mm256_undefined_si256 (),\
6021 (__mmask8)-1))
6023 #define _mm512_mask_extracti64x4_epi64(W, U, X, C) \
6024 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
6025 (int) (C),\
6026 (__v4di)(__m256i)(W),\
6027 (__mmask8)(U)))
6029 #define _mm512_maskz_extracti64x4_epi64(U, X, C) \
6030 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
6031 (int) (C),\
6032 (__v4di)(__m256i)_mm256_setzero_si256 (),\
6033 (__mmask8)(U)))
6035 #define _mm512_extracti32x4_epi32(X, C) \
6036 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
6037 (int) (C),\
6038 (__v4si)(__m128i)_mm_undefined_si128 (),\
6039 (__mmask8)-1))
6041 #define _mm512_mask_extracti32x4_epi32(W, U, X, C) \
6042 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
6043 (int) (C),\
6044 (__v4si)(__m128i)(W),\
6045 (__mmask8)(U)))
6047 #define _mm512_maskz_extracti32x4_epi32(U, X, C) \
6048 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
6049 (int) (C),\
6050 (__v4si)(__m128i)_mm_setzero_si128 (),\
6051 (__mmask8)(U)))
6052 #endif
6054 #ifdef __OPTIMIZE__
6055 extern __inline __m512i
6056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6057 _mm512_inserti32x4 (__m512i __A, __m128i __B, const int __imm)
6059 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __A,
6060 (__v4si) __B,
6061 __imm,
6062 (__v16si) __A, -1);
6065 extern __inline __m512
6066 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6067 _mm512_insertf32x4 (__m512 __A, __m128 __B, const int __imm)
6069 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __A,
6070 (__v4sf) __B,
6071 __imm,
6072 (__v16sf) __A, -1);
6075 extern __inline __m512i
6076 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6077 _mm512_inserti64x4 (__m512i __A, __m256i __B, const int __imm)
6079 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
6080 (__v4di) __B,
6081 __imm,
6082 (__v8di)
6083 _mm512_undefined_epi32 (),
6084 (__mmask8) -1);
6087 extern __inline __m512i
6088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6089 _mm512_mask_inserti64x4 (__m512i __W, __mmask8 __U, __m512i __A,
6090 __m256i __B, const int __imm)
6092 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
6093 (__v4di) __B,
6094 __imm,
6095 (__v8di) __W,
6096 (__mmask8) __U);
6099 extern __inline __m512i
6100 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6101 _mm512_maskz_inserti64x4 (__mmask8 __U, __m512i __A, __m256i __B,
6102 const int __imm)
6104 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
6105 (__v4di) __B,
6106 __imm,
6107 (__v8di)
6108 _mm512_setzero_si512 (),
6109 (__mmask8) __U);
6112 extern __inline __m512d
6113 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6114 _mm512_insertf64x4 (__m512d __A, __m256d __B, const int __imm)
6116 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
6117 (__v4df) __B,
6118 __imm,
6119 (__v8df)
6120 _mm512_undefined_pd (),
6121 (__mmask8) -1);
6124 extern __inline __m512d
6125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6126 _mm512_mask_insertf64x4 (__m512d __W, __mmask8 __U, __m512d __A,
6127 __m256d __B, const int __imm)
6129 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
6130 (__v4df) __B,
6131 __imm,
6132 (__v8df) __W,
6133 (__mmask8) __U);
6136 extern __inline __m512d
6137 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6138 _mm512_maskz_insertf64x4 (__mmask8 __U, __m512d __A, __m256d __B,
6139 const int __imm)
6141 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
6142 (__v4df) __B,
6143 __imm,
6144 (__v8df)
6145 _mm512_setzero_pd (),
6146 (__mmask8) __U);
6148 #else
6149 #define _mm512_insertf32x4(X, Y, C) \
6150 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
6151 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (X), (__mmask16)(-1)))
6153 #define _mm512_inserti32x4(X, Y, C) \
6154 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
6155 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (X), (__mmask16)(-1)))
6157 #define _mm512_insertf64x4(X, Y, C) \
6158 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
6159 (__v4df)(__m256d) (Y), (int) (C), \
6160 (__v8df)(__m512d)_mm512_undefined_pd(), \
6161 (__mmask8)-1))
6163 #define _mm512_mask_insertf64x4(W, U, X, Y, C) \
6164 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
6165 (__v4df)(__m256d) (Y), (int) (C), \
6166 (__v8df)(__m512d)(W), \
6167 (__mmask8)(U)))
6169 #define _mm512_maskz_insertf64x4(U, X, Y, C) \
6170 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
6171 (__v4df)(__m256d) (Y), (int) (C), \
6172 (__v8df)(__m512d)_mm512_setzero_pd(), \
6173 (__mmask8)(U)))
6175 #define _mm512_inserti64x4(X, Y, C) \
6176 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
6177 (__v4di)(__m256i) (Y), (int) (C), \
6178 (__v8di)(__m512i)_mm512_undefined_epi32 (), \
6179 (__mmask8)-1))
6181 #define _mm512_mask_inserti64x4(W, U, X, Y, C) \
6182 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
6183 (__v4di)(__m256i) (Y), (int) (C),\
6184 (__v8di)(__m512i)(W),\
6185 (__mmask8)(U)))
6187 #define _mm512_maskz_inserti64x4(U, X, Y, C) \
6188 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
6189 (__v4di)(__m256i) (Y), (int) (C), \
6190 (__v8di)(__m512i)_mm512_setzero_si512 (), \
6191 (__mmask8)(U)))
6192 #endif
6194 extern __inline __m512d
6195 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6196 _mm512_loadu_pd (void const *__P)
6198 return *(__m512d_u *)__P;
6201 extern __inline __m512d
6202 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6203 _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
6205 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
6206 (__v8df) __W,
6207 (__mmask8) __U);
6210 extern __inline __m512d
6211 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6212 _mm512_maskz_loadu_pd (__mmask8 __U, void const *__P)
6214 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
6215 (__v8df)
6216 _mm512_setzero_pd (),
6217 (__mmask8) __U);
6220 extern __inline void
6221 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6222 _mm512_storeu_pd (void *__P, __m512d __A)
6224 *(__m512d_u *)__P = __A;
6227 extern __inline void
6228 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6229 _mm512_mask_storeu_pd (void *__P, __mmask8 __U, __m512d __A)
6231 __builtin_ia32_storeupd512_mask ((double *) __P, (__v8df) __A,
6232 (__mmask8) __U);
6235 extern __inline __m512
6236 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6237 _mm512_loadu_ps (void const *__P)
6239 return *(__m512_u *)__P;
6242 extern __inline __m512
6243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6244 _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
6246 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
6247 (__v16sf) __W,
6248 (__mmask16) __U);
6251 extern __inline __m512
6252 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6253 _mm512_maskz_loadu_ps (__mmask16 __U, void const *__P)
6255 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
6256 (__v16sf)
6257 _mm512_setzero_ps (),
6258 (__mmask16) __U);
6261 extern __inline void
6262 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6263 _mm512_storeu_ps (void *__P, __m512 __A)
6265 *(__m512_u *)__P = __A;
6268 extern __inline void
6269 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6270 _mm512_mask_storeu_ps (void *__P, __mmask16 __U, __m512 __A)
6272 __builtin_ia32_storeups512_mask ((float *) __P, (__v16sf) __A,
6273 (__mmask16) __U);
6276 extern __inline __m128
6277 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6278 _mm_mask_load_ss (__m128 __W, __mmask8 __U, const float *__P)
6280 return (__m128) __builtin_ia32_loadss_mask (__P, (__v4sf) __W, __U);
6283 extern __inline __m128
6284 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6285 _mm_maskz_load_ss (__mmask8 __U, const float *__P)
6287 return (__m128) __builtin_ia32_loadss_mask (__P, (__v4sf) _mm_setzero_ps (),
6288 __U);
6291 extern __inline __m128d
6292 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6293 _mm_mask_load_sd (__m128d __W, __mmask8 __U, const double *__P)
6295 return (__m128d) __builtin_ia32_loadsd_mask (__P, (__v2df) __W, __U);
6298 extern __inline __m128d
6299 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6300 _mm_maskz_load_sd (__mmask8 __U, const double *__P)
6302 return (__m128d) __builtin_ia32_loadsd_mask (__P, (__v2df) _mm_setzero_pd (),
6303 __U);
6306 extern __inline __m128
6307 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6308 _mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
6310 return (__m128) __builtin_ia32_movess_mask ((__v4sf) __A, (__v4sf) __B,
6311 (__v4sf) __W, __U);
6314 extern __inline __m128
6315 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6316 _mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
6318 return (__m128) __builtin_ia32_movess_mask ((__v4sf) __A, (__v4sf) __B,
6319 (__v4sf) _mm_setzero_ps (), __U);
6322 extern __inline __m128d
6323 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6324 _mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
6326 return (__m128d) __builtin_ia32_movesd_mask ((__v2df) __A, (__v2df) __B,
6327 (__v2df) __W, __U);
6330 extern __inline __m128d
6331 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6332 _mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B)
6334 return (__m128d) __builtin_ia32_movesd_mask ((__v2df) __A, (__v2df) __B,
6335 (__v2df) _mm_setzero_pd (),
6336 __U);
6339 extern __inline void
6340 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6341 _mm_mask_store_ss (float *__P, __mmask8 __U, __m128 __A)
6343 __builtin_ia32_storess_mask (__P, (__v4sf) __A, (__mmask8) __U);
6346 extern __inline void
6347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6348 _mm_mask_store_sd (double *__P, __mmask8 __U, __m128d __A)
6350 __builtin_ia32_storesd_mask (__P, (__v2df) __A, (__mmask8) __U);
6353 extern __inline __m512i
6354 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6355 _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
6357 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
6358 (__v8di) __W,
6359 (__mmask8) __U);
6362 extern __inline __m512i
6363 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6364 _mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
6366 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
6367 (__v8di)
6368 _mm512_setzero_si512 (),
6369 (__mmask8) __U);
6372 extern __inline void
6373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6374 _mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A)
6376 __builtin_ia32_storedqudi512_mask ((long long *) __P, (__v8di) __A,
6377 (__mmask8) __U);
6380 extern __inline __m512i
6381 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6382 _mm512_loadu_si512 (void const *__P)
6384 return *(__m512i_u *)__P;
6387 extern __inline __m512i
6388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6389 _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
6391 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
6392 (__v16si) __W,
6393 (__mmask16) __U);
6396 extern __inline __m512i
6397 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6398 _mm512_maskz_loadu_epi32 (__mmask16 __U, void const *__P)
6400 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
6401 (__v16si)
6402 _mm512_setzero_si512 (),
6403 (__mmask16) __U);
6406 extern __inline void
6407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6408 _mm512_storeu_si512 (void *__P, __m512i __A)
6410 *(__m512i_u *)__P = __A;
6413 extern __inline void
6414 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6415 _mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A)
6417 __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A,
6418 (__mmask16) __U);
6421 extern __inline __m512d
6422 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6423 _mm512_permutevar_pd (__m512d __A, __m512i __C)
6425 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
6426 (__v8di) __C,
6427 (__v8df)
6428 _mm512_undefined_pd (),
6429 (__mmask8) -1);
6432 extern __inline __m512d
6433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6434 _mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
6436 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
6437 (__v8di) __C,
6438 (__v8df) __W,
6439 (__mmask8) __U);
6442 extern __inline __m512d
6443 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6444 _mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C)
6446 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
6447 (__v8di) __C,
6448 (__v8df)
6449 _mm512_setzero_pd (),
6450 (__mmask8) __U);
6453 extern __inline __m512
6454 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6455 _mm512_permutevar_ps (__m512 __A, __m512i __C)
6457 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
6458 (__v16si) __C,
6459 (__v16sf)
6460 _mm512_undefined_ps (),
6461 (__mmask16) -1);
6464 extern __inline __m512
6465 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6466 _mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
6468 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
6469 (__v16si) __C,
6470 (__v16sf) __W,
6471 (__mmask16) __U);
6474 extern __inline __m512
6475 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6476 _mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C)
6478 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
6479 (__v16si) __C,
6480 (__v16sf)
6481 _mm512_setzero_ps (),
6482 (__mmask16) __U);
6485 extern __inline __m512i
6486 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6487 _mm512_permutex2var_epi64 (__m512i __A, __m512i __I, __m512i __B)
6489 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
6490 /* idx */ ,
6491 (__v8di) __A,
6492 (__v8di) __B,
6493 (__mmask8) -1);
6496 extern __inline __m512i
6497 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6498 _mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
6499 __m512i __B)
6501 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
6502 /* idx */ ,
6503 (__v8di) __A,
6504 (__v8di) __B,
6505 (__mmask8) __U);
6508 extern __inline __m512i
6509 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6510 _mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
6511 __mmask8 __U, __m512i __B)
6513 return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
6514 (__v8di) __I
6515 /* idx */ ,
6516 (__v8di) __B,
6517 (__mmask8) __U);
6520 extern __inline __m512i
6521 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6522 _mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
6523 __m512i __I, __m512i __B)
6525 return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
6526 /* idx */ ,
6527 (__v8di) __A,
6528 (__v8di) __B,
6529 (__mmask8) __U);
6532 extern __inline __m512i
6533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6534 _mm512_permutex2var_epi32 (__m512i __A, __m512i __I, __m512i __B)
6536 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
6537 /* idx */ ,
6538 (__v16si) __A,
6539 (__v16si) __B,
6540 (__mmask16) -1);
6543 extern __inline __m512i
6544 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6545 _mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
6546 __m512i __I, __m512i __B)
6548 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
6549 /* idx */ ,
6550 (__v16si) __A,
6551 (__v16si) __B,
6552 (__mmask16) __U);
6555 extern __inline __m512i
6556 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6557 _mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
6558 __mmask16 __U, __m512i __B)
6560 return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
6561 (__v16si) __I
6562 /* idx */ ,
6563 (__v16si) __B,
6564 (__mmask16) __U);
6567 extern __inline __m512i
6568 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6569 _mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
6570 __m512i __I, __m512i __B)
6572 return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
6573 /* idx */ ,
6574 (__v16si) __A,
6575 (__v16si) __B,
6576 (__mmask16) __U);
6579 extern __inline __m512d
6580 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6581 _mm512_permutex2var_pd (__m512d __A, __m512i __I, __m512d __B)
6583 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6584 /* idx */ ,
6585 (__v8df) __A,
6586 (__v8df) __B,
6587 (__mmask8) -1);
6590 extern __inline __m512d
6591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6592 _mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I,
6593 __m512d __B)
6595 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6596 /* idx */ ,
6597 (__v8df) __A,
6598 (__v8df) __B,
6599 (__mmask8) __U);
6602 extern __inline __m512d
6603 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6604 _mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
6605 __m512d __B)
6607 return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
6608 (__v8di) __I
6609 /* idx */ ,
6610 (__v8df) __B,
6611 (__mmask8) __U);
6614 extern __inline __m512d
6615 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6616 _mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
6617 __m512d __B)
6619 return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
6620 /* idx */ ,
6621 (__v8df) __A,
6622 (__v8df) __B,
6623 (__mmask8) __U);
6626 extern __inline __m512
6627 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6628 _mm512_permutex2var_ps (__m512 __A, __m512i __I, __m512 __B)
6630 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6631 /* idx */ ,
6632 (__v16sf) __A,
6633 (__v16sf) __B,
6634 (__mmask16) -1);
6637 extern __inline __m512
6638 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6639 _mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
6641 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6642 /* idx */ ,
6643 (__v16sf) __A,
6644 (__v16sf) __B,
6645 (__mmask16) __U);
6648 extern __inline __m512
6649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6650 _mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
6651 __m512 __B)
6653 return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
6654 (__v16si) __I
6655 /* idx */ ,
6656 (__v16sf) __B,
6657 (__mmask16) __U);
6660 extern __inline __m512
6661 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6662 _mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
6663 __m512 __B)
6665 return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
6666 /* idx */ ,
6667 (__v16sf) __A,
6668 (__v16sf) __B,
6669 (__mmask16) __U);
6672 #ifdef __OPTIMIZE__
6673 extern __inline __m512d
6674 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6675 _mm512_permute_pd (__m512d __X, const int __C)
6677 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6678 (__v8df)
6679 _mm512_undefined_pd (),
6680 (__mmask8) -1);
6683 extern __inline __m512d
6684 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6685 _mm512_mask_permute_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __C)
6687 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6688 (__v8df) __W,
6689 (__mmask8) __U);
6692 extern __inline __m512d
6693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6694 _mm512_maskz_permute_pd (__mmask8 __U, __m512d __X, const int __C)
6696 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6697 (__v8df)
6698 _mm512_setzero_pd (),
6699 (__mmask8) __U);
6702 extern __inline __m512
6703 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6704 _mm512_permute_ps (__m512 __X, const int __C)
6706 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6707 (__v16sf)
6708 _mm512_undefined_ps (),
6709 (__mmask16) -1);
6712 extern __inline __m512
6713 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6714 _mm512_mask_permute_ps (__m512 __W, __mmask16 __U, __m512 __X, const int __C)
6716 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6717 (__v16sf) __W,
6718 (__mmask16) __U);
6721 extern __inline __m512
6722 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6723 _mm512_maskz_permute_ps (__mmask16 __U, __m512 __X, const int __C)
6725 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6726 (__v16sf)
6727 _mm512_setzero_ps (),
6728 (__mmask16) __U);
6730 #else
6731 #define _mm512_permute_pd(X, C) \
6732 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6733 (__v8df)(__m512d)_mm512_undefined_pd(),\
6734 (__mmask8)(-1)))
6736 #define _mm512_mask_permute_pd(W, U, X, C) \
6737 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6738 (__v8df)(__m512d)(W), \
6739 (__mmask8)(U)))
6741 #define _mm512_maskz_permute_pd(U, X, C) \
6742 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6743 (__v8df)(__m512d)_mm512_setzero_pd(), \
6744 (__mmask8)(U)))
6746 #define _mm512_permute_ps(X, C) \
6747 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6748 (__v16sf)(__m512)_mm512_undefined_ps(),\
6749 (__mmask16)(-1)))
6751 #define _mm512_mask_permute_ps(W, U, X, C) \
6752 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6753 (__v16sf)(__m512)(W), \
6754 (__mmask16)(U)))
6756 #define _mm512_maskz_permute_ps(U, X, C) \
6757 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6758 (__v16sf)(__m512)_mm512_setzero_ps(), \
6759 (__mmask16)(U)))
6760 #endif
6762 #ifdef __OPTIMIZE__
6763 extern __inline __m512i
6764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6765 _mm512_permutex_epi64 (__m512i __X, const int __I)
6767 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6768 (__v8di)
6769 _mm512_undefined_epi32 (),
6770 (__mmask8) (-1));
6773 extern __inline __m512i
6774 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6775 _mm512_mask_permutex_epi64 (__m512i __W, __mmask8 __M,
6776 __m512i __X, const int __I)
6778 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6779 (__v8di) __W,
6780 (__mmask8) __M);
6783 extern __inline __m512i
6784 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6785 _mm512_maskz_permutex_epi64 (__mmask8 __M, __m512i __X, const int __I)
6787 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6788 (__v8di)
6789 _mm512_setzero_si512 (),
6790 (__mmask8) __M);
6793 extern __inline __m512d
6794 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6795 _mm512_permutex_pd (__m512d __X, const int __M)
6797 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6798 (__v8df)
6799 _mm512_undefined_pd (),
6800 (__mmask8) -1);
6803 extern __inline __m512d
6804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6805 _mm512_mask_permutex_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __M)
6807 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6808 (__v8df) __W,
6809 (__mmask8) __U);
6812 extern __inline __m512d
6813 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6814 _mm512_maskz_permutex_pd (__mmask8 __U, __m512d __X, const int __M)
6816 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6817 (__v8df)
6818 _mm512_setzero_pd (),
6819 (__mmask8) __U);
6821 #else
6822 #define _mm512_permutex_pd(X, M) \
6823 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6824 (__v8df)(__m512d)_mm512_undefined_pd(),\
6825 (__mmask8)-1))
6827 #define _mm512_mask_permutex_pd(W, U, X, M) \
6828 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6829 (__v8df)(__m512d)(W), (__mmask8)(U)))
6831 #define _mm512_maskz_permutex_pd(U, X, M) \
6832 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6833 (__v8df)(__m512d)_mm512_setzero_pd(),\
6834 (__mmask8)(U)))
6836 #define _mm512_permutex_epi64(X, I) \
6837 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6838 (int)(I), \
6839 (__v8di)(__m512i) \
6840 (_mm512_undefined_epi32 ()),\
6841 (__mmask8)(-1)))
6843 #define _mm512_maskz_permutex_epi64(M, X, I) \
6844 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6845 (int)(I), \
6846 (__v8di)(__m512i) \
6847 (_mm512_setzero_si512 ()),\
6848 (__mmask8)(M)))
6850 #define _mm512_mask_permutex_epi64(W, M, X, I) \
6851 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6852 (int)(I), \
6853 (__v8di)(__m512i)(W), \
6854 (__mmask8)(M)))
6855 #endif
6857 extern __inline __m512i
6858 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6859 _mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
6861 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6862 (__v8di) __X,
6863 (__v8di)
6864 _mm512_setzero_si512 (),
6865 __M);
6868 extern __inline __m512i
6869 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6870 _mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
6872 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6873 (__v8di) __X,
6874 (__v8di)
6875 _mm512_undefined_epi32 (),
6876 (__mmask8) -1);
6879 extern __inline __m512i
6880 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6881 _mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
6882 __m512i __Y)
6884 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6885 (__v8di) __X,
6886 (__v8di) __W,
6887 __M);
6890 extern __inline __m512i
6891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6892 _mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
6894 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6895 (__v16si) __X,
6896 (__v16si)
6897 _mm512_setzero_si512 (),
6898 __M);
6901 extern __inline __m512i
6902 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6903 _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
6905 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6906 (__v16si) __X,
6907 (__v16si)
6908 _mm512_undefined_epi32 (),
6909 (__mmask16) -1);
6912 extern __inline __m512i
6913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6914 _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
6915 __m512i __Y)
6917 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6918 (__v16si) __X,
6919 (__v16si) __W,
6920 __M);
6923 extern __inline __m512d
6924 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6925 _mm512_permutexvar_pd (__m512i __X, __m512d __Y)
6927 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6928 (__v8di) __X,
6929 (__v8df)
6930 _mm512_undefined_pd (),
6931 (__mmask8) -1);
6934 extern __inline __m512d
6935 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6936 _mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
6938 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6939 (__v8di) __X,
6940 (__v8df) __W,
6941 (__mmask8) __U);
6944 extern __inline __m512d
6945 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6946 _mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
6948 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6949 (__v8di) __X,
6950 (__v8df)
6951 _mm512_setzero_pd (),
6952 (__mmask8) __U);
6955 extern __inline __m512
6956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6957 _mm512_permutexvar_ps (__m512i __X, __m512 __Y)
6959 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6960 (__v16si) __X,
6961 (__v16sf)
6962 _mm512_undefined_ps (),
6963 (__mmask16) -1);
6966 extern __inline __m512
6967 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6968 _mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
6970 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6971 (__v16si) __X,
6972 (__v16sf) __W,
6973 (__mmask16) __U);
6976 extern __inline __m512
6977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6978 _mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
6980 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6981 (__v16si) __X,
6982 (__v16sf)
6983 _mm512_setzero_ps (),
6984 (__mmask16) __U);
6987 #ifdef __OPTIMIZE__
6988 extern __inline __m512
6989 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6990 _mm512_shuffle_ps (__m512 __M, __m512 __V, const int __imm)
6992 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6993 (__v16sf) __V, __imm,
6994 (__v16sf)
6995 _mm512_undefined_ps (),
6996 (__mmask16) -1);
6999 extern __inline __m512
7000 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7001 _mm512_mask_shuffle_ps (__m512 __W, __mmask16 __U, __m512 __M,
7002 __m512 __V, const int __imm)
7004 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
7005 (__v16sf) __V, __imm,
7006 (__v16sf) __W,
7007 (__mmask16) __U);
7010 extern __inline __m512
7011 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7012 _mm512_maskz_shuffle_ps (__mmask16 __U, __m512 __M, __m512 __V, const int __imm)
7014 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
7015 (__v16sf) __V, __imm,
7016 (__v16sf)
7017 _mm512_setzero_ps (),
7018 (__mmask16) __U);
7021 extern __inline __m512d
7022 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7023 _mm512_shuffle_pd (__m512d __M, __m512d __V, const int __imm)
7025 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
7026 (__v8df) __V, __imm,
7027 (__v8df)
7028 _mm512_undefined_pd (),
7029 (__mmask8) -1);
7032 extern __inline __m512d
7033 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7034 _mm512_mask_shuffle_pd (__m512d __W, __mmask8 __U, __m512d __M,
7035 __m512d __V, const int __imm)
7037 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
7038 (__v8df) __V, __imm,
7039 (__v8df) __W,
7040 (__mmask8) __U);
7043 extern __inline __m512d
7044 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7045 _mm512_maskz_shuffle_pd (__mmask8 __U, __m512d __M, __m512d __V,
7046 const int __imm)
7048 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
7049 (__v8df) __V, __imm,
7050 (__v8df)
7051 _mm512_setzero_pd (),
7052 (__mmask8) __U);
7055 extern __inline __m512d
7056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7057 _mm512_fixupimm_round_pd (__m512d __A, __m512d __B, __m512i __C,
7058 const int __imm, const int __R)
7060 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
7061 (__v8df) __B,
7062 (__v8di) __C,
7063 __imm,
7064 (__mmask8) -1, __R);
7067 extern __inline __m512d
7068 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7069 _mm512_mask_fixupimm_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
7070 __m512i __C, const int __imm, const int __R)
7072 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
7073 (__v8df) __B,
7074 (__v8di) __C,
7075 __imm,
7076 (__mmask8) __U, __R);
7079 extern __inline __m512d
7080 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7081 _mm512_maskz_fixupimm_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
7082 __m512i __C, const int __imm, const int __R)
7084 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
7085 (__v8df) __B,
7086 (__v8di) __C,
7087 __imm,
7088 (__mmask8) __U, __R);
7091 extern __inline __m512
7092 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7093 _mm512_fixupimm_round_ps (__m512 __A, __m512 __B, __m512i __C,
7094 const int __imm, const int __R)
7096 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
7097 (__v16sf) __B,
7098 (__v16si) __C,
7099 __imm,
7100 (__mmask16) -1, __R);
7103 extern __inline __m512
7104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7105 _mm512_mask_fixupimm_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
7106 __m512i __C, const int __imm, const int __R)
7108 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
7109 (__v16sf) __B,
7110 (__v16si) __C,
7111 __imm,
7112 (__mmask16) __U, __R);
7115 extern __inline __m512
7116 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7117 _mm512_maskz_fixupimm_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
7118 __m512i __C, const int __imm, const int __R)
7120 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
7121 (__v16sf) __B,
7122 (__v16si) __C,
7123 __imm,
7124 (__mmask16) __U, __R);
7127 extern __inline __m128d
7128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7129 _mm_fixupimm_round_sd (__m128d __A, __m128d __B, __m128i __C,
7130 const int __imm, const int __R)
7132 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
7133 (__v2df) __B,
7134 (__v2di) __C, __imm,
7135 (__mmask8) -1, __R);
7138 extern __inline __m128d
7139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7140 _mm_mask_fixupimm_round_sd (__m128d __A, __mmask8 __U, __m128d __B,
7141 __m128i __C, const int __imm, const int __R)
7143 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
7144 (__v2df) __B,
7145 (__v2di) __C, __imm,
7146 (__mmask8) __U, __R);
7149 extern __inline __m128d
7150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7151 _mm_maskz_fixupimm_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
7152 __m128i __C, const int __imm, const int __R)
7154 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
7155 (__v2df) __B,
7156 (__v2di) __C,
7157 __imm,
7158 (__mmask8) __U, __R);
7161 extern __inline __m128
7162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7163 _mm_fixupimm_round_ss (__m128 __A, __m128 __B, __m128i __C,
7164 const int __imm, const int __R)
7166 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
7167 (__v4sf) __B,
7168 (__v4si) __C, __imm,
7169 (__mmask8) -1, __R);
7172 extern __inline __m128
7173 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7174 _mm_mask_fixupimm_round_ss (__m128 __A, __mmask8 __U, __m128 __B,
7175 __m128i __C, const int __imm, const int __R)
7177 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
7178 (__v4sf) __B,
7179 (__v4si) __C, __imm,
7180 (__mmask8) __U, __R);
7183 extern __inline __m128
7184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7185 _mm_maskz_fixupimm_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
7186 __m128i __C, const int __imm, const int __R)
7188 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
7189 (__v4sf) __B,
7190 (__v4si) __C, __imm,
7191 (__mmask8) __U, __R);
7194 #else
7195 #define _mm512_shuffle_pd(X, Y, C) \
7196 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
7197 (__v8df)(__m512d)(Y), (int)(C),\
7198 (__v8df)(__m512d)_mm512_undefined_pd(),\
7199 (__mmask8)-1))
7201 #define _mm512_mask_shuffle_pd(W, U, X, Y, C) \
7202 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
7203 (__v8df)(__m512d)(Y), (int)(C),\
7204 (__v8df)(__m512d)(W),\
7205 (__mmask8)(U)))
7207 #define _mm512_maskz_shuffle_pd(U, X, Y, C) \
7208 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
7209 (__v8df)(__m512d)(Y), (int)(C),\
7210 (__v8df)(__m512d)_mm512_setzero_pd(),\
7211 (__mmask8)(U)))
7213 #define _mm512_shuffle_ps(X, Y, C) \
7214 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
7215 (__v16sf)(__m512)(Y), (int)(C),\
7216 (__v16sf)(__m512)_mm512_undefined_ps(),\
7217 (__mmask16)-1))
7219 #define _mm512_mask_shuffle_ps(W, U, X, Y, C) \
7220 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
7221 (__v16sf)(__m512)(Y), (int)(C),\
7222 (__v16sf)(__m512)(W),\
7223 (__mmask16)(U)))
7225 #define _mm512_maskz_shuffle_ps(U, X, Y, C) \
7226 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
7227 (__v16sf)(__m512)(Y), (int)(C),\
7228 (__v16sf)(__m512)_mm512_setzero_ps(),\
7229 (__mmask16)(U)))
7231 #define _mm512_fixupimm_round_pd(X, Y, Z, C, R) \
7232 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
7233 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
7234 (__mmask8)(-1), (R)))
7236 #define _mm512_mask_fixupimm_round_pd(X, U, Y, Z, C, R) \
7237 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
7238 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
7239 (__mmask8)(U), (R)))
7241 #define _mm512_maskz_fixupimm_round_pd(U, X, Y, Z, C, R) \
7242 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
7243 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
7244 (__mmask8)(U), (R)))
7246 #define _mm512_fixupimm_round_ps(X, Y, Z, C, R) \
7247 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
7248 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
7249 (__mmask16)(-1), (R)))
7251 #define _mm512_mask_fixupimm_round_ps(X, U, Y, Z, C, R) \
7252 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
7253 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
7254 (__mmask16)(U), (R)))
7256 #define _mm512_maskz_fixupimm_round_ps(U, X, Y, Z, C, R) \
7257 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
7258 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
7259 (__mmask16)(U), (R)))
7261 #define _mm_fixupimm_round_sd(X, Y, Z, C, R) \
7262 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
7263 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
7264 (__mmask8)(-1), (R)))
7266 #define _mm_mask_fixupimm_round_sd(X, U, Y, Z, C, R) \
7267 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
7268 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
7269 (__mmask8)(U), (R)))
7271 #define _mm_maskz_fixupimm_round_sd(U, X, Y, Z, C, R) \
7272 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
7273 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
7274 (__mmask8)(U), (R)))
7276 #define _mm_fixupimm_round_ss(X, Y, Z, C, R) \
7277 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
7278 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
7279 (__mmask8)(-1), (R)))
7281 #define _mm_mask_fixupimm_round_ss(X, U, Y, Z, C, R) \
7282 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
7283 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
7284 (__mmask8)(U), (R)))
7286 #define _mm_maskz_fixupimm_round_ss(U, X, Y, Z, C, R) \
7287 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
7288 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
7289 (__mmask8)(U), (R)))
7290 #endif
7292 extern __inline __m512
7293 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7294 _mm512_movehdup_ps (__m512 __A)
7296 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
7297 (__v16sf)
7298 _mm512_undefined_ps (),
7299 (__mmask16) -1);
7302 extern __inline __m512
7303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7304 _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
7306 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
7307 (__v16sf) __W,
7308 (__mmask16) __U);
7311 extern __inline __m512
7312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7313 _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
7315 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
7316 (__v16sf)
7317 _mm512_setzero_ps (),
7318 (__mmask16) __U);
7321 extern __inline __m512
7322 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7323 _mm512_moveldup_ps (__m512 __A)
7325 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
7326 (__v16sf)
7327 _mm512_undefined_ps (),
7328 (__mmask16) -1);
7331 extern __inline __m512
7332 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7333 _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
7335 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
7336 (__v16sf) __W,
7337 (__mmask16) __U);
7340 extern __inline __m512
7341 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7342 _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
7344 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
7345 (__v16sf)
7346 _mm512_setzero_ps (),
7347 (__mmask16) __U);
7350 extern __inline __m512i
7351 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7352 _mm512_or_si512 (__m512i __A, __m512i __B)
7354 return (__m512i) ((__v16su) __A | (__v16su) __B);
7357 extern __inline __m512i
7358 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7359 _mm512_or_epi32 (__m512i __A, __m512i __B)
7361 return (__m512i) ((__v16su) __A | (__v16su) __B);
7364 extern __inline __m512i
7365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7366 _mm512_mask_or_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7368 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
7369 (__v16si) __B,
7370 (__v16si) __W,
7371 (__mmask16) __U);
7374 extern __inline __m512i
7375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7376 _mm512_maskz_or_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7378 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
7379 (__v16si) __B,
7380 (__v16si)
7381 _mm512_setzero_si512 (),
7382 (__mmask16) __U);
7385 extern __inline __m512i
7386 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7387 _mm512_or_epi64 (__m512i __A, __m512i __B)
7389 return (__m512i) ((__v8du) __A | (__v8du) __B);
7392 extern __inline __m512i
7393 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7394 _mm512_mask_or_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7396 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
7397 (__v8di) __B,
7398 (__v8di) __W,
7399 (__mmask8) __U);
7402 extern __inline __m512i
7403 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7404 _mm512_maskz_or_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7406 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
7407 (__v8di) __B,
7408 (__v8di)
7409 _mm512_setzero_si512 (),
7410 (__mmask8) __U);
7413 extern __inline __m512i
7414 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7415 _mm512_xor_si512 (__m512i __A, __m512i __B)
7417 return (__m512i) ((__v16su) __A ^ (__v16su) __B);
7420 extern __inline __m512i
7421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7422 _mm512_xor_epi32 (__m512i __A, __m512i __B)
7424 return (__m512i) ((__v16su) __A ^ (__v16su) __B);
7427 extern __inline __m512i
7428 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7429 _mm512_mask_xor_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7431 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
7432 (__v16si) __B,
7433 (__v16si) __W,
7434 (__mmask16) __U);
7437 extern __inline __m512i
7438 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7439 _mm512_maskz_xor_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7441 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
7442 (__v16si) __B,
7443 (__v16si)
7444 _mm512_setzero_si512 (),
7445 (__mmask16) __U);
7448 extern __inline __m512i
7449 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7450 _mm512_xor_epi64 (__m512i __A, __m512i __B)
7452 return (__m512i) ((__v8du) __A ^ (__v8du) __B);
7455 extern __inline __m512i
7456 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7457 _mm512_mask_xor_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7459 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
7460 (__v8di) __B,
7461 (__v8di) __W,
7462 (__mmask8) __U);
7465 extern __inline __m512i
7466 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7467 _mm512_maskz_xor_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7469 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
7470 (__v8di) __B,
7471 (__v8di)
7472 _mm512_setzero_si512 (),
7473 (__mmask8) __U);
7476 #ifdef __OPTIMIZE__
7477 extern __inline __m512i
7478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7479 _mm512_rol_epi32 (__m512i __A, const int __B)
7481 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
7482 (__v16si)
7483 _mm512_undefined_epi32 (),
7484 (__mmask16) -1);
7487 extern __inline __m512i
7488 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7489 _mm512_mask_rol_epi32 (__m512i __W, __mmask16 __U, __m512i __A, const int __B)
7491 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
7492 (__v16si) __W,
7493 (__mmask16) __U);
7496 extern __inline __m512i
7497 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7498 _mm512_maskz_rol_epi32 (__mmask16 __U, __m512i __A, const int __B)
7500 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
7501 (__v16si)
7502 _mm512_setzero_si512 (),
7503 (__mmask16) __U);
7506 extern __inline __m512i
7507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7508 _mm512_ror_epi32 (__m512i __A, int __B)
7510 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
7511 (__v16si)
7512 _mm512_undefined_epi32 (),
7513 (__mmask16) -1);
7516 extern __inline __m512i
7517 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7518 _mm512_mask_ror_epi32 (__m512i __W, __mmask16 __U, __m512i __A, int __B)
7520 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
7521 (__v16si) __W,
7522 (__mmask16) __U);
7525 extern __inline __m512i
7526 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7527 _mm512_maskz_ror_epi32 (__mmask16 __U, __m512i __A, int __B)
7529 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
7530 (__v16si)
7531 _mm512_setzero_si512 (),
7532 (__mmask16) __U);
7535 extern __inline __m512i
7536 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7537 _mm512_rol_epi64 (__m512i __A, const int __B)
7539 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
7540 (__v8di)
7541 _mm512_undefined_epi32 (),
7542 (__mmask8) -1);
7545 extern __inline __m512i
7546 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7547 _mm512_mask_rol_epi64 (__m512i __W, __mmask8 __U, __m512i __A, const int __B)
7549 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
7550 (__v8di) __W,
7551 (__mmask8) __U);
7554 extern __inline __m512i
7555 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7556 _mm512_maskz_rol_epi64 (__mmask8 __U, __m512i __A, const int __B)
7558 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
7559 (__v8di)
7560 _mm512_setzero_si512 (),
7561 (__mmask8) __U);
7564 extern __inline __m512i
7565 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7566 _mm512_ror_epi64 (__m512i __A, int __B)
7568 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
7569 (__v8di)
7570 _mm512_undefined_epi32 (),
7571 (__mmask8) -1);
7574 extern __inline __m512i
7575 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7576 _mm512_mask_ror_epi64 (__m512i __W, __mmask8 __U, __m512i __A, int __B)
7578 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
7579 (__v8di) __W,
7580 (__mmask8) __U);
7583 extern __inline __m512i
7584 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7585 _mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B)
7587 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
7588 (__v8di)
7589 _mm512_setzero_si512 (),
7590 (__mmask8) __U);
7593 #else
7594 #define _mm512_rol_epi32(A, B) \
7595 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7596 (int)(B), \
7597 (__v16si)_mm512_undefined_epi32 (), \
7598 (__mmask16)(-1)))
7599 #define _mm512_mask_rol_epi32(W, U, A, B) \
7600 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7601 (int)(B), \
7602 (__v16si)(__m512i)(W), \
7603 (__mmask16)(U)))
7604 #define _mm512_maskz_rol_epi32(U, A, B) \
7605 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7606 (int)(B), \
7607 (__v16si)_mm512_setzero_si512 (), \
7608 (__mmask16)(U)))
7609 #define _mm512_ror_epi32(A, B) \
7610 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7611 (int)(B), \
7612 (__v16si)_mm512_undefined_epi32 (), \
7613 (__mmask16)(-1)))
7614 #define _mm512_mask_ror_epi32(W, U, A, B) \
7615 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7616 (int)(B), \
7617 (__v16si)(__m512i)(W), \
7618 (__mmask16)(U)))
7619 #define _mm512_maskz_ror_epi32(U, A, B) \
7620 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7621 (int)(B), \
7622 (__v16si)_mm512_setzero_si512 (), \
7623 (__mmask16)(U)))
7624 #define _mm512_rol_epi64(A, B) \
7625 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7626 (int)(B), \
7627 (__v8di)_mm512_undefined_epi32 (), \
7628 (__mmask8)(-1)))
7629 #define _mm512_mask_rol_epi64(W, U, A, B) \
7630 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7631 (int)(B), \
7632 (__v8di)(__m512i)(W), \
7633 (__mmask8)(U)))
7634 #define _mm512_maskz_rol_epi64(U, A, B) \
7635 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7636 (int)(B), \
7637 (__v8di)_mm512_setzero_si512 (), \
7638 (__mmask8)(U)))
7640 #define _mm512_ror_epi64(A, B) \
7641 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7642 (int)(B), \
7643 (__v8di)_mm512_undefined_epi32 (), \
7644 (__mmask8)(-1)))
7645 #define _mm512_mask_ror_epi64(W, U, A, B) \
7646 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7647 (int)(B), \
7648 (__v8di)(__m512i)(W), \
7649 (__mmask8)(U)))
7650 #define _mm512_maskz_ror_epi64(U, A, B) \
7651 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7652 (int)(B), \
7653 (__v8di)_mm512_setzero_si512 (), \
7654 (__mmask8)(U)))
7655 #endif
7657 extern __inline __m512i
7658 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7659 _mm512_and_si512 (__m512i __A, __m512i __B)
7661 return (__m512i) ((__v16su) __A & (__v16su) __B);
7664 extern __inline __m512i
7665 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7666 _mm512_and_epi32 (__m512i __A, __m512i __B)
7668 return (__m512i) ((__v16su) __A & (__v16su) __B);
7671 extern __inline __m512i
7672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7673 _mm512_mask_and_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7675 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7676 (__v16si) __B,
7677 (__v16si) __W,
7678 (__mmask16) __U);
7681 extern __inline __m512i
7682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7683 _mm512_maskz_and_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7685 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7686 (__v16si) __B,
7687 (__v16si)
7688 _mm512_setzero_si512 (),
7689 (__mmask16) __U);
7692 extern __inline __m512i
7693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7694 _mm512_and_epi64 (__m512i __A, __m512i __B)
7696 return (__m512i) ((__v8du) __A & (__v8du) __B);
7699 extern __inline __m512i
7700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7701 _mm512_mask_and_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7703 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7704 (__v8di) __B,
7705 (__v8di) __W, __U);
7708 extern __inline __m512i
7709 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7710 _mm512_maskz_and_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7712 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7713 (__v8di) __B,
7714 (__v8di)
7715 _mm512_setzero_pd (),
7716 __U);
7719 extern __inline __m512i
7720 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7721 _mm512_andnot_si512 (__m512i __A, __m512i __B)
7723 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7724 (__v16si) __B,
7725 (__v16si)
7726 _mm512_undefined_epi32 (),
7727 (__mmask16) -1);
7730 extern __inline __m512i
7731 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7732 _mm512_andnot_epi32 (__m512i __A, __m512i __B)
7734 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7735 (__v16si) __B,
7736 (__v16si)
7737 _mm512_undefined_epi32 (),
7738 (__mmask16) -1);
7741 extern __inline __m512i
7742 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7743 _mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7745 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7746 (__v16si) __B,
7747 (__v16si) __W,
7748 (__mmask16) __U);
7751 extern __inline __m512i
7752 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7753 _mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7755 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7756 (__v16si) __B,
7757 (__v16si)
7758 _mm512_setzero_si512 (),
7759 (__mmask16) __U);
7762 extern __inline __m512i
7763 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7764 _mm512_andnot_epi64 (__m512i __A, __m512i __B)
7766 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7767 (__v8di) __B,
7768 (__v8di)
7769 _mm512_undefined_epi32 (),
7770 (__mmask8) -1);
7773 extern __inline __m512i
7774 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7775 _mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7777 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7778 (__v8di) __B,
7779 (__v8di) __W, __U);
7782 extern __inline __m512i
7783 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7784 _mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7786 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7787 (__v8di) __B,
7788 (__v8di)
7789 _mm512_setzero_pd (),
7790 __U);
7793 extern __inline __mmask16
7794 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7795 _mm512_test_epi32_mask (__m512i __A, __m512i __B)
7797 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7798 (__v16si) __B,
7799 (__mmask16) -1);
7802 extern __inline __mmask16
7803 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7804 _mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7806 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7807 (__v16si) __B, __U);
7810 extern __inline __mmask8
7811 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7812 _mm512_test_epi64_mask (__m512i __A, __m512i __B)
7814 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
7815 (__v8di) __B,
7816 (__mmask8) -1);
7819 extern __inline __mmask8
7820 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7821 _mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7823 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
7826 extern __inline __mmask16
7827 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7828 _mm512_testn_epi32_mask (__m512i __A, __m512i __B)
7830 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7831 (__v16si) __B,
7832 (__mmask16) -1);
7835 extern __inline __mmask16
7836 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7837 _mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7839 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7840 (__v16si) __B, __U);
7843 extern __inline __mmask8
7844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7845 _mm512_testn_epi64_mask (__m512i __A, __m512i __B)
7847 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7848 (__v8di) __B,
7849 (__mmask8) -1);
7852 extern __inline __mmask8
7853 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7854 _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7856 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7857 (__v8di) __B, __U);
7860 extern __inline __m512
7861 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7862 _mm512_abs_ps (__m512 __A)
7864 return (__m512) _mm512_and_epi32 ((__m512i) __A,
7865 _mm512_set1_epi32 (0x7fffffff));
7868 extern __inline __m512
7869 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7870 _mm512_mask_abs_ps (__m512 __W, __mmask16 __U, __m512 __A)
7872 return (__m512) _mm512_mask_and_epi32 ((__m512i) __W, __U, (__m512i) __A,
7873 _mm512_set1_epi32 (0x7fffffff));
7876 extern __inline __m512d
7877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7878 _mm512_abs_pd (__m512d __A)
7880 return (__m512d) _mm512_and_epi64 ((__m512i) __A,
7881 _mm512_set1_epi64 (0x7fffffffffffffffLL));
7884 extern __inline __m512d
7885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7886 _mm512_mask_abs_pd (__m512d __W, __mmask8 __U, __m512d __A)
7888 return (__m512d)
7889 _mm512_mask_and_epi64 ((__m512i) __W, __U, (__m512i) __A,
7890 _mm512_set1_epi64 (0x7fffffffffffffffLL));
7893 extern __inline __m512i
7894 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7895 _mm512_unpackhi_epi32 (__m512i __A, __m512i __B)
7897 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7898 (__v16si) __B,
7899 (__v16si)
7900 _mm512_undefined_epi32 (),
7901 (__mmask16) -1);
7904 extern __inline __m512i
7905 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7906 _mm512_mask_unpackhi_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7907 __m512i __B)
7909 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7910 (__v16si) __B,
7911 (__v16si) __W,
7912 (__mmask16) __U);
7915 extern __inline __m512i
7916 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7917 _mm512_maskz_unpackhi_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7919 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7920 (__v16si) __B,
7921 (__v16si)
7922 _mm512_setzero_si512 (),
7923 (__mmask16) __U);
7926 extern __inline __m512i
7927 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7928 _mm512_unpackhi_epi64 (__m512i __A, __m512i __B)
7930 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7931 (__v8di) __B,
7932 (__v8di)
7933 _mm512_undefined_epi32 (),
7934 (__mmask8) -1);
7937 extern __inline __m512i
7938 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7939 _mm512_mask_unpackhi_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7941 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7942 (__v8di) __B,
7943 (__v8di) __W,
7944 (__mmask8) __U);
7947 extern __inline __m512i
7948 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7949 _mm512_maskz_unpackhi_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7951 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7952 (__v8di) __B,
7953 (__v8di)
7954 _mm512_setzero_si512 (),
7955 (__mmask8) __U);
7958 extern __inline __m512i
7959 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7960 _mm512_unpacklo_epi32 (__m512i __A, __m512i __B)
7962 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7963 (__v16si) __B,
7964 (__v16si)
7965 _mm512_undefined_epi32 (),
7966 (__mmask16) -1);
7969 extern __inline __m512i
7970 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7971 _mm512_mask_unpacklo_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7972 __m512i __B)
7974 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7975 (__v16si) __B,
7976 (__v16si) __W,
7977 (__mmask16) __U);
7980 extern __inline __m512i
7981 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7982 _mm512_maskz_unpacklo_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7984 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7985 (__v16si) __B,
7986 (__v16si)
7987 _mm512_setzero_si512 (),
7988 (__mmask16) __U);
7991 extern __inline __m512i
7992 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7993 _mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
7995 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7996 (__v8di) __B,
7997 (__v8di)
7998 _mm512_undefined_epi32 (),
7999 (__mmask8) -1);
8002 extern __inline __m512i
8003 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8004 _mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
8006 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
8007 (__v8di) __B,
8008 (__v8di) __W,
8009 (__mmask8) __U);
8012 extern __inline __m512i
8013 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8014 _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
8016 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
8017 (__v8di) __B,
8018 (__v8di)
8019 _mm512_setzero_si512 (),
8020 (__mmask8) __U);
8023 #ifdef __x86_64__
8024 #ifdef __OPTIMIZE__
8025 extern __inline unsigned long long
8026 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8027 _mm_cvt_roundss_u64 (__m128 __A, const int __R)
8029 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) __A, __R);
8032 extern __inline long long
8033 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8034 _mm_cvt_roundss_si64 (__m128 __A, const int __R)
8036 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
8039 extern __inline long long
8040 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8041 _mm_cvt_roundss_i64 (__m128 __A, const int __R)
8043 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
8046 extern __inline unsigned long long
8047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8048 _mm_cvtt_roundss_u64 (__m128 __A, const int __R)
8050 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) __A, __R);
8053 extern __inline long long
8054 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8055 _mm_cvtt_roundss_i64 (__m128 __A, const int __R)
8057 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
8060 extern __inline long long
8061 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8062 _mm_cvtt_roundss_si64 (__m128 __A, const int __R)
8064 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
8066 #else
8067 #define _mm_cvt_roundss_u64(A, B) \
8068 ((unsigned long long)__builtin_ia32_vcvtss2usi64(A, B))
8070 #define _mm_cvt_roundss_si64(A, B) \
8071 ((long long)__builtin_ia32_vcvtss2si64(A, B))
8073 #define _mm_cvt_roundss_i64(A, B) \
8074 ((long long)__builtin_ia32_vcvtss2si64(A, B))
8076 #define _mm_cvtt_roundss_u64(A, B) \
8077 ((unsigned long long)__builtin_ia32_vcvttss2usi64(A, B))
8079 #define _mm_cvtt_roundss_i64(A, B) \
8080 ((long long)__builtin_ia32_vcvttss2si64(A, B))
8082 #define _mm_cvtt_roundss_si64(A, B) \
8083 ((long long)__builtin_ia32_vcvttss2si64(A, B))
8084 #endif
8085 #endif
8087 #ifdef __OPTIMIZE__
8088 extern __inline unsigned
8089 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8090 _mm_cvt_roundss_u32 (__m128 __A, const int __R)
8092 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, __R);
8095 extern __inline int
8096 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8097 _mm_cvt_roundss_si32 (__m128 __A, const int __R)
8099 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
8102 extern __inline int
8103 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8104 _mm_cvt_roundss_i32 (__m128 __A, const int __R)
8106 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
8109 extern __inline unsigned
8110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8111 _mm_cvtt_roundss_u32 (__m128 __A, const int __R)
8113 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, __R);
8116 extern __inline int
8117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8118 _mm_cvtt_roundss_i32 (__m128 __A, const int __R)
8120 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
8123 extern __inline int
8124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8125 _mm_cvtt_roundss_si32 (__m128 __A, const int __R)
8127 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
8129 #else
8130 #define _mm_cvt_roundss_u32(A, B) \
8131 ((unsigned)__builtin_ia32_vcvtss2usi32(A, B))
8133 #define _mm_cvt_roundss_si32(A, B) \
8134 ((int)__builtin_ia32_vcvtss2si32(A, B))
8136 #define _mm_cvt_roundss_i32(A, B) \
8137 ((int)__builtin_ia32_vcvtss2si32(A, B))
8139 #define _mm_cvtt_roundss_u32(A, B) \
8140 ((unsigned)__builtin_ia32_vcvttss2usi32(A, B))
8142 #define _mm_cvtt_roundss_si32(A, B) \
8143 ((int)__builtin_ia32_vcvttss2si32(A, B))
8145 #define _mm_cvtt_roundss_i32(A, B) \
8146 ((int)__builtin_ia32_vcvttss2si32(A, B))
8147 #endif
8149 #ifdef __x86_64__
8150 #ifdef __OPTIMIZE__
8151 extern __inline unsigned long long
8152 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8153 _mm_cvt_roundsd_u64 (__m128d __A, const int __R)
8155 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) __A, __R);
8158 extern __inline long long
8159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8160 _mm_cvt_roundsd_si64 (__m128d __A, const int __R)
8162 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
8165 extern __inline long long
8166 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8167 _mm_cvt_roundsd_i64 (__m128d __A, const int __R)
8169 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
8172 extern __inline unsigned long long
8173 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8174 _mm_cvtt_roundsd_u64 (__m128d __A, const int __R)
8176 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) __A, __R);
8179 extern __inline long long
8180 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8181 _mm_cvtt_roundsd_si64 (__m128d __A, const int __R)
8183 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
8186 extern __inline long long
8187 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8188 _mm_cvtt_roundsd_i64 (__m128d __A, const int __R)
8190 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
8192 #else
8193 #define _mm_cvt_roundsd_u64(A, B) \
8194 ((unsigned long long)__builtin_ia32_vcvtsd2usi64(A, B))
8196 #define _mm_cvt_roundsd_si64(A, B) \
8197 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
8199 #define _mm_cvt_roundsd_i64(A, B) \
8200 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
8202 #define _mm_cvtt_roundsd_u64(A, B) \
8203 ((unsigned long long)__builtin_ia32_vcvttsd2usi64(A, B))
8205 #define _mm_cvtt_roundsd_si64(A, B) \
8206 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
8208 #define _mm_cvtt_roundsd_i64(A, B) \
8209 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
8210 #endif
8211 #endif
8213 #ifdef __OPTIMIZE__
8214 extern __inline unsigned
8215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8216 _mm_cvt_roundsd_u32 (__m128d __A, const int __R)
8218 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, __R);
8221 extern __inline int
8222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8223 _mm_cvt_roundsd_si32 (__m128d __A, const int __R)
8225 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
8228 extern __inline int
8229 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8230 _mm_cvt_roundsd_i32 (__m128d __A, const int __R)
8232 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
8235 extern __inline unsigned
8236 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8237 _mm_cvtt_roundsd_u32 (__m128d __A, const int __R)
8239 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, __R);
8242 extern __inline int
8243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8244 _mm_cvtt_roundsd_i32 (__m128d __A, const int __R)
8246 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
8249 extern __inline int
8250 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8251 _mm_cvtt_roundsd_si32 (__m128d __A, const int __R)
8253 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
8255 #else
8256 #define _mm_cvt_roundsd_u32(A, B) \
8257 ((unsigned)__builtin_ia32_vcvtsd2usi32(A, B))
8259 #define _mm_cvt_roundsd_si32(A, B) \
8260 ((int)__builtin_ia32_vcvtsd2si32(A, B))
8262 #define _mm_cvt_roundsd_i32(A, B) \
8263 ((int)__builtin_ia32_vcvtsd2si32(A, B))
8265 #define _mm_cvtt_roundsd_u32(A, B) \
8266 ((unsigned)__builtin_ia32_vcvttsd2usi32(A, B))
8268 #define _mm_cvtt_roundsd_si32(A, B) \
8269 ((int)__builtin_ia32_vcvttsd2si32(A, B))
8271 #define _mm_cvtt_roundsd_i32(A, B) \
8272 ((int)__builtin_ia32_vcvttsd2si32(A, B))
8273 #endif
8275 extern __inline __m512d
8276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8277 _mm512_movedup_pd (__m512d __A)
8279 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
8280 (__v8df)
8281 _mm512_undefined_pd (),
8282 (__mmask8) -1);
8285 extern __inline __m512d
8286 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8287 _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
8289 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
8290 (__v8df) __W,
8291 (__mmask8) __U);
8294 extern __inline __m512d
8295 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8296 _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
8298 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
8299 (__v8df)
8300 _mm512_setzero_pd (),
8301 (__mmask8) __U);
8304 extern __inline __m512d
8305 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8306 _mm512_unpacklo_pd (__m512d __A, __m512d __B)
8308 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
8309 (__v8df) __B,
8310 (__v8df)
8311 _mm512_undefined_pd (),
8312 (__mmask8) -1);
8315 extern __inline __m512d
8316 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8317 _mm512_mask_unpacklo_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
8319 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
8320 (__v8df) __B,
8321 (__v8df) __W,
8322 (__mmask8) __U);
8325 extern __inline __m512d
8326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8327 _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
8329 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
8330 (__v8df) __B,
8331 (__v8df)
8332 _mm512_setzero_pd (),
8333 (__mmask8) __U);
8336 extern __inline __m512d
8337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8338 _mm512_unpackhi_pd (__m512d __A, __m512d __B)
8340 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
8341 (__v8df) __B,
8342 (__v8df)
8343 _mm512_undefined_pd (),
8344 (__mmask8) -1);
8347 extern __inline __m512d
8348 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8349 _mm512_mask_unpackhi_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
8351 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
8352 (__v8df) __B,
8353 (__v8df) __W,
8354 (__mmask8) __U);
8357 extern __inline __m512d
8358 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8359 _mm512_maskz_unpackhi_pd (__mmask8 __U, __m512d __A, __m512d __B)
8361 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
8362 (__v8df) __B,
8363 (__v8df)
8364 _mm512_setzero_pd (),
8365 (__mmask8) __U);
8368 extern __inline __m512
8369 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8370 _mm512_unpackhi_ps (__m512 __A, __m512 __B)
8372 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
8373 (__v16sf) __B,
8374 (__v16sf)
8375 _mm512_undefined_ps (),
8376 (__mmask16) -1);
8379 extern __inline __m512
8380 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8381 _mm512_mask_unpackhi_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
8383 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
8384 (__v16sf) __B,
8385 (__v16sf) __W,
8386 (__mmask16) __U);
8389 extern __inline __m512
8390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8391 _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
8393 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
8394 (__v16sf) __B,
8395 (__v16sf)
8396 _mm512_setzero_ps (),
8397 (__mmask16) __U);
8400 #ifdef __OPTIMIZE__
8401 extern __inline __m512d
8402 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8403 _mm512_cvt_roundps_pd (__m256 __A, const int __R)
8405 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
8406 (__v8df)
8407 _mm512_undefined_pd (),
8408 (__mmask8) -1, __R);
8411 extern __inline __m512d
8412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8413 _mm512_mask_cvt_roundps_pd (__m512d __W, __mmask8 __U, __m256 __A,
8414 const int __R)
8416 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
8417 (__v8df) __W,
8418 (__mmask8) __U, __R);
8421 extern __inline __m512d
8422 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8423 _mm512_maskz_cvt_roundps_pd (__mmask8 __U, __m256 __A, const int __R)
8425 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
8426 (__v8df)
8427 _mm512_setzero_pd (),
8428 (__mmask8) __U, __R);
8431 extern __inline __m512
8432 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8433 _mm512_cvt_roundph_ps (__m256i __A, const int __R)
8435 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
8436 (__v16sf)
8437 _mm512_undefined_ps (),
8438 (__mmask16) -1, __R);
8441 extern __inline __m512
8442 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8443 _mm512_mask_cvt_roundph_ps (__m512 __W, __mmask16 __U, __m256i __A,
8444 const int __R)
8446 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
8447 (__v16sf) __W,
8448 (__mmask16) __U, __R);
8451 extern __inline __m512
8452 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8453 _mm512_maskz_cvt_roundph_ps (__mmask16 __U, __m256i __A, const int __R)
8455 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
8456 (__v16sf)
8457 _mm512_setzero_ps (),
8458 (__mmask16) __U, __R);
8461 extern __inline __m256i
8462 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8463 _mm512_cvt_roundps_ph (__m512 __A, const int __I)
8465 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8466 __I,
8467 (__v16hi)
8468 _mm256_undefined_si256 (),
8469 -1);
8472 extern __inline __m256i
8473 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8474 _mm512_cvtps_ph (__m512 __A, const int __I)
8476 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8477 __I,
8478 (__v16hi)
8479 _mm256_undefined_si256 (),
8480 -1);
8483 extern __inline __m256i
8484 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8485 _mm512_mask_cvt_roundps_ph (__m256i __U, __mmask16 __W, __m512 __A,
8486 const int __I)
8488 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8489 __I,
8490 (__v16hi) __U,
8491 (__mmask16) __W);
8494 extern __inline __m256i
8495 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8496 _mm512_mask_cvtps_ph (__m256i __U, __mmask16 __W, __m512 __A, const int __I)
8498 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8499 __I,
8500 (__v16hi) __U,
8501 (__mmask16) __W);
8504 extern __inline __m256i
8505 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8506 _mm512_maskz_cvt_roundps_ph (__mmask16 __W, __m512 __A, const int __I)
8508 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8509 __I,
8510 (__v16hi)
8511 _mm256_setzero_si256 (),
8512 (__mmask16) __W);
8515 extern __inline __m256i
8516 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8517 _mm512_maskz_cvtps_ph (__mmask16 __W, __m512 __A, const int __I)
8519 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8520 __I,
8521 (__v16hi)
8522 _mm256_setzero_si256 (),
8523 (__mmask16) __W);
8525 #else
8526 #define _mm512_cvt_roundps_pd(A, B) \
8527 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, B)
8529 #define _mm512_mask_cvt_roundps_pd(W, U, A, B) \
8530 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)(W), U, B)
8532 #define _mm512_maskz_cvt_roundps_pd(U, A, B) \
8533 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_setzero_pd(), U, B)
8535 #define _mm512_cvt_roundph_ps(A, B) \
8536 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
8538 #define _mm512_mask_cvt_roundph_ps(W, U, A, B) \
8539 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)(W), U, B)
8541 #define _mm512_maskz_cvt_roundph_ps(U, A, B) \
8542 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_setzero_ps(), U, B)
8544 #define _mm512_cvt_roundps_ph(A, I) \
8545 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
8546 (__v16hi)_mm256_undefined_si256 (), -1))
8547 #define _mm512_cvtps_ph(A, I) \
8548 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
8549 (__v16hi)_mm256_undefined_si256 (), -1))
8550 #define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
8551 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
8552 (__v16hi)(__m256i)(U), (__mmask16) (W)))
8553 #define _mm512_mask_cvtps_ph(U, W, A, I) \
8554 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
8555 (__v16hi)(__m256i)(U), (__mmask16) (W)))
8556 #define _mm512_maskz_cvt_roundps_ph(W, A, I) \
8557 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
8558 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
8559 #define _mm512_maskz_cvtps_ph(W, A, I) \
8560 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
8561 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
8562 #endif
8564 #ifdef __OPTIMIZE__
8565 extern __inline __m256
8566 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8567 _mm512_cvt_roundpd_ps (__m512d __A, const int __R)
8569 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
8570 (__v8sf)
8571 _mm256_undefined_ps (),
8572 (__mmask8) -1, __R);
8575 extern __inline __m256
8576 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8577 _mm512_mask_cvt_roundpd_ps (__m256 __W, __mmask8 __U, __m512d __A,
8578 const int __R)
8580 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
8581 (__v8sf) __W,
8582 (__mmask8) __U, __R);
8585 extern __inline __m256
8586 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8587 _mm512_maskz_cvt_roundpd_ps (__mmask8 __U, __m512d __A, const int __R)
8589 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
8590 (__v8sf)
8591 _mm256_setzero_ps (),
8592 (__mmask8) __U, __R);
8595 extern __inline __m128
8596 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8597 _mm_cvt_roundsd_ss (__m128 __A, __m128d __B, const int __R)
8599 return (__m128) __builtin_ia32_cvtsd2ss_round ((__v4sf) __A,
8600 (__v2df) __B,
8601 __R);
8604 extern __inline __m128d
8605 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8606 _mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R)
8608 return (__m128d) __builtin_ia32_cvtss2sd_round ((__v2df) __A,
8609 (__v4sf) __B,
8610 __R);
8612 #else
8613 #define _mm512_cvt_roundpd_ps(A, B) \
8614 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_undefined_ps(), -1, B)
8616 #define _mm512_mask_cvt_roundpd_ps(W, U, A, B) \
8617 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)(W), U, B)
8619 #define _mm512_maskz_cvt_roundpd_ps(U, A, B) \
8620 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), U, B)
8622 #define _mm_cvt_roundsd_ss(A, B, C) \
8623 (__m128)__builtin_ia32_cvtsd2ss_round(A, B, C)
8625 #define _mm_cvt_roundss_sd(A, B, C) \
8626 (__m128d)__builtin_ia32_cvtss2sd_round(A, B, C)
8627 #endif
8629 extern __inline void
8630 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8631 _mm512_stream_si512 (__m512i * __P, __m512i __A)
8633 __builtin_ia32_movntdq512 ((__v8di *) __P, (__v8di) __A);
8636 extern __inline void
8637 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8638 _mm512_stream_ps (float *__P, __m512 __A)
8640 __builtin_ia32_movntps512 (__P, (__v16sf) __A);
8643 extern __inline void
8644 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8645 _mm512_stream_pd (double *__P, __m512d __A)
8647 __builtin_ia32_movntpd512 (__P, (__v8df) __A);
8650 extern __inline __m512i
8651 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8652 _mm512_stream_load_si512 (void *__P)
8654 return __builtin_ia32_movntdqa512 ((__v8di *)__P);
8657 /* Constants for mantissa extraction */
8658 typedef enum
8660 _MM_MANT_NORM_1_2, /* interval [1, 2) */
8661 _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
8662 _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
8663 _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
8664 } _MM_MANTISSA_NORM_ENUM;
8666 typedef enum
8668 _MM_MANT_SIGN_src, /* sign = sign(SRC) */
8669 _MM_MANT_SIGN_zero, /* sign = 0 */
8670 _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
8671 } _MM_MANTISSA_SIGN_ENUM;
8673 #ifdef __OPTIMIZE__
8674 extern __inline __m128
8675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8676 _mm_getexp_round_ss (__m128 __A, __m128 __B, const int __R)
8678 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
8679 (__v4sf) __B,
8680 __R);
8683 extern __inline __m128
8684 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8685 _mm_mask_getexp_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
8686 __m128 __B, const int __R)
8688 return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
8689 (__v4sf) __B,
8690 (__v4sf) __W,
8691 (__mmask8) __U, __R);
8694 extern __inline __m128
8695 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8696 _mm_maskz_getexp_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
8697 const int __R)
8699 return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
8700 (__v4sf) __B,
8701 (__v4sf)
8702 _mm_setzero_ps (),
8703 (__mmask8) __U, __R);
8706 extern __inline __m128d
8707 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8708 _mm_getexp_round_sd (__m128d __A, __m128d __B, const int __R)
8710 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
8711 (__v2df) __B,
8712 __R);
8715 extern __inline __m128d
8716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8717 _mm_mask_getexp_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
8718 __m128d __B, const int __R)
8720 return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
8721 (__v2df) __B,
8722 (__v2df) __W,
8723 (__mmask8) __U, __R);
8726 extern __inline __m128d
8727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8728 _mm_maskz_getexp_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
8729 const int __R)
8731 return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
8732 (__v2df) __B,
8733 (__v2df)
8734 _mm_setzero_pd (),
8735 (__mmask8) __U, __R);
8738 extern __inline __m512
8739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8740 _mm512_getexp_round_ps (__m512 __A, const int __R)
8742 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8743 (__v16sf)
8744 _mm512_undefined_ps (),
8745 (__mmask16) -1, __R);
8748 extern __inline __m512
8749 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8750 _mm512_mask_getexp_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8751 const int __R)
8753 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8754 (__v16sf) __W,
8755 (__mmask16) __U, __R);
8758 extern __inline __m512
8759 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8760 _mm512_maskz_getexp_round_ps (__mmask16 __U, __m512 __A, const int __R)
8762 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8763 (__v16sf)
8764 _mm512_setzero_ps (),
8765 (__mmask16) __U, __R);
8768 extern __inline __m512d
8769 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8770 _mm512_getexp_round_pd (__m512d __A, const int __R)
8772 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8773 (__v8df)
8774 _mm512_undefined_pd (),
8775 (__mmask8) -1, __R);
8778 extern __inline __m512d
8779 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8780 _mm512_mask_getexp_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8781 const int __R)
8783 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8784 (__v8df) __W,
8785 (__mmask8) __U, __R);
8788 extern __inline __m512d
8789 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8790 _mm512_maskz_getexp_round_pd (__mmask8 __U, __m512d __A, const int __R)
8792 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8793 (__v8df)
8794 _mm512_setzero_pd (),
8795 (__mmask8) __U, __R);
8798 extern __inline __m512d
8799 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8800 _mm512_getmant_round_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
8801 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8803 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8804 (__C << 2) | __B,
8805 _mm512_undefined_pd (),
8806 (__mmask8) -1, __R);
8809 extern __inline __m512d
8810 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8811 _mm512_mask_getmant_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8812 _MM_MANTISSA_NORM_ENUM __B,
8813 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8815 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8816 (__C << 2) | __B,
8817 (__v8df) __W, __U,
8818 __R);
8821 extern __inline __m512d
8822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8823 _mm512_maskz_getmant_round_pd (__mmask8 __U, __m512d __A,
8824 _MM_MANTISSA_NORM_ENUM __B,
8825 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8827 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8828 (__C << 2) | __B,
8829 (__v8df)
8830 _mm512_setzero_pd (),
8831 __U, __R);
8834 extern __inline __m512
8835 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8836 _mm512_getmant_round_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
8837 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8839 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8840 (__C << 2) | __B,
8841 _mm512_undefined_ps (),
8842 (__mmask16) -1, __R);
8845 extern __inline __m512
8846 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8847 _mm512_mask_getmant_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8848 _MM_MANTISSA_NORM_ENUM __B,
8849 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8851 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8852 (__C << 2) | __B,
8853 (__v16sf) __W, __U,
8854 __R);
8857 extern __inline __m512
8858 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8859 _mm512_maskz_getmant_round_ps (__mmask16 __U, __m512 __A,
8860 _MM_MANTISSA_NORM_ENUM __B,
8861 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8863 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8864 (__C << 2) | __B,
8865 (__v16sf)
8866 _mm512_setzero_ps (),
8867 __U, __R);
8870 extern __inline __m128d
8871 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8872 _mm_getmant_round_sd (__m128d __A, __m128d __B,
8873 _MM_MANTISSA_NORM_ENUM __C,
8874 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8876 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
8877 (__v2df) __B,
8878 (__D << 2) | __C,
8879 __R);
8882 extern __inline __m128d
8883 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8884 _mm_mask_getmant_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
8885 __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
8886 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8888 return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
8889 (__v2df) __B,
8890 (__D << 2) | __C,
8891 (__v2df) __W,
8892 __U, __R);
8895 extern __inline __m128d
8896 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8897 _mm_maskz_getmant_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
8898 _MM_MANTISSA_NORM_ENUM __C,
8899 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8901 return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
8902 (__v2df) __B,
8903 (__D << 2) | __C,
8904 (__v2df)
8905 _mm_setzero_pd(),
8906 __U, __R);
8909 extern __inline __m128
8910 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8911 _mm_getmant_round_ss (__m128 __A, __m128 __B,
8912 _MM_MANTISSA_NORM_ENUM __C,
8913 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8915 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
8916 (__v4sf) __B,
8917 (__D << 2) | __C,
8918 __R);
8921 extern __inline __m128
8922 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8923 _mm_mask_getmant_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
8924 __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
8925 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8927 return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
8928 (__v4sf) __B,
8929 (__D << 2) | __C,
8930 (__v4sf) __W,
8931 __U, __R);
8934 extern __inline __m128
8935 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8936 _mm_maskz_getmant_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
8937 _MM_MANTISSA_NORM_ENUM __C,
8938 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8940 return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
8941 (__v4sf) __B,
8942 (__D << 2) | __C,
8943 (__v4sf)
8944 _mm_setzero_ps(),
8945 __U, __R);
8948 #else
8949 #define _mm512_getmant_round_pd(X, B, C, R) \
8950 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8951 (int)(((C)<<2) | (B)), \
8952 (__v8df)(__m512d)_mm512_undefined_pd(), \
8953 (__mmask8)-1,\
8954 (R)))
8956 #define _mm512_mask_getmant_round_pd(W, U, X, B, C, R) \
8957 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8958 (int)(((C)<<2) | (B)), \
8959 (__v8df)(__m512d)(W), \
8960 (__mmask8)(U),\
8961 (R)))
8963 #define _mm512_maskz_getmant_round_pd(U, X, B, C, R) \
8964 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8965 (int)(((C)<<2) | (B)), \
8966 (__v8df)(__m512d)_mm512_setzero_pd(), \
8967 (__mmask8)(U),\
8968 (R)))
8969 #define _mm512_getmant_round_ps(X, B, C, R) \
8970 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8971 (int)(((C)<<2) | (B)), \
8972 (__v16sf)(__m512)_mm512_undefined_ps(), \
8973 (__mmask16)-1,\
8974 (R)))
8976 #define _mm512_mask_getmant_round_ps(W, U, X, B, C, R) \
8977 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8978 (int)(((C)<<2) | (B)), \
8979 (__v16sf)(__m512)(W), \
8980 (__mmask16)(U),\
8981 (R)))
8983 #define _mm512_maskz_getmant_round_ps(U, X, B, C, R) \
8984 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8985 (int)(((C)<<2) | (B)), \
8986 (__v16sf)(__m512)_mm512_setzero_ps(), \
8987 (__mmask16)(U),\
8988 (R)))
8989 #define _mm_getmant_round_sd(X, Y, C, D, R) \
8990 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
8991 (__v2df)(__m128d)(Y), \
8992 (int)(((D)<<2) | (C)), \
8993 (R)))
8995 #define _mm_mask_getmant_round_sd(W, U, X, Y, C, D, R) \
8996 ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
8997 (__v2df)(__m128d)(Y), \
8998 (int)(((D)<<2) | (C)), \
8999 (__v2df)(__m128d)(W), \
9000 (__mmask8)(U),\
9001 (R)))
9003 #define _mm_maskz_getmant_round_sd(U, X, Y, C, D, R) \
9004 ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
9005 (__v2df)(__m128d)(Y), \
9006 (int)(((D)<<2) | (C)), \
9007 (__v2df)(__m128d)_mm_setzero_pd(), \
9008 (__mmask8)(U),\
9009 (R)))
9011 #define _mm_getmant_round_ss(X, Y, C, D, R) \
9012 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
9013 (__v4sf)(__m128)(Y), \
9014 (int)(((D)<<2) | (C)), \
9015 (R)))
9017 #define _mm_mask_getmant_round_ss(W, U, X, Y, C, D, R) \
9018 ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
9019 (__v4sf)(__m128)(Y), \
9020 (int)(((D)<<2) | (C)), \
9021 (__v4sf)(__m128)(W), \
9022 (__mmask8)(U),\
9023 (R)))
9025 #define _mm_maskz_getmant_round_ss(U, X, Y, C, D, R) \
9026 ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
9027 (__v4sf)(__m128)(Y), \
9028 (int)(((D)<<2) | (C)), \
9029 (__v4sf)(__m128)_mm_setzero_ps(), \
9030 (__mmask8)(U),\
9031 (R)))
9033 #define _mm_getexp_round_ss(A, B, R) \
9034 ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), R))
9036 #define _mm_mask_getexp_round_ss(W, U, A, B, C) \
9037 (__m128)__builtin_ia32_getexpss_mask_round(A, B, W, U, C)
9039 #define _mm_maskz_getexp_round_ss(U, A, B, C) \
9040 (__m128)__builtin_ia32_getexpss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
9042 #define _mm_getexp_round_sd(A, B, R) \
9043 ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), R))
9045 #define _mm_mask_getexp_round_sd(W, U, A, B, C) \
9046 (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, W, U, C)
9048 #define _mm_maskz_getexp_round_sd(U, A, B, C) \
9049 (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
9052 #define _mm512_getexp_round_ps(A, R) \
9053 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
9054 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, R))
9056 #define _mm512_mask_getexp_round_ps(W, U, A, R) \
9057 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
9058 (__v16sf)(__m512)(W), (__mmask16)(U), R))
9060 #define _mm512_maskz_getexp_round_ps(U, A, R) \
9061 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
9062 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), R))
9064 #define _mm512_getexp_round_pd(A, R) \
9065 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
9066 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, R))
9068 #define _mm512_mask_getexp_round_pd(W, U, A, R) \
9069 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
9070 (__v8df)(__m512d)(W), (__mmask8)(U), R))
9072 #define _mm512_maskz_getexp_round_pd(U, A, R) \
9073 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
9074 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), R))
9075 #endif
9077 #ifdef __OPTIMIZE__
9078 extern __inline __m512
9079 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9080 _mm512_roundscale_round_ps (__m512 __A, const int __imm, const int __R)
9082 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
9083 (__v16sf)
9084 _mm512_undefined_ps (),
9085 -1, __R);
9088 extern __inline __m512
9089 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9090 _mm512_mask_roundscale_round_ps (__m512 __A, __mmask16 __B, __m512 __C,
9091 const int __imm, const int __R)
9093 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
9094 (__v16sf) __A,
9095 (__mmask16) __B, __R);
9098 extern __inline __m512
9099 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9100 _mm512_maskz_roundscale_round_ps (__mmask16 __A, __m512 __B,
9101 const int __imm, const int __R)
9103 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
9104 __imm,
9105 (__v16sf)
9106 _mm512_setzero_ps (),
9107 (__mmask16) __A, __R);
9110 extern __inline __m512d
9111 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9112 _mm512_roundscale_round_pd (__m512d __A, const int __imm, const int __R)
9114 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
9115 (__v8df)
9116 _mm512_undefined_pd (),
9117 -1, __R);
9120 extern __inline __m512d
9121 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9122 _mm512_mask_roundscale_round_pd (__m512d __A, __mmask8 __B,
9123 __m512d __C, const int __imm, const int __R)
9125 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
9126 (__v8df) __A,
9127 (__mmask8) __B, __R);
9130 extern __inline __m512d
9131 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9132 _mm512_maskz_roundscale_round_pd (__mmask8 __A, __m512d __B,
9133 const int __imm, const int __R)
9135 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
9136 __imm,
9137 (__v8df)
9138 _mm512_setzero_pd (),
9139 (__mmask8) __A, __R);
9142 extern __inline __m128
9143 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9144 _mm_roundscale_round_ss (__m128 __A, __m128 __B, const int __imm, const int __R)
9146 return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
9147 (__v4sf) __B, __imm, __R);
9150 extern __inline __m128d
9151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9152 _mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm,
9153 const int __R)
9155 return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
9156 (__v2df) __B, __imm, __R);
9159 #else
9160 #define _mm512_roundscale_round_ps(A, B, R) \
9161 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
9162 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), R))
9163 #define _mm512_mask_roundscale_round_ps(A, B, C, D, R) \
9164 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
9165 (int)(D), \
9166 (__v16sf)(__m512)(A), \
9167 (__mmask16)(B), R))
9168 #define _mm512_maskz_roundscale_round_ps(A, B, C, R) \
9169 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
9170 (int)(C), \
9171 (__v16sf)_mm512_setzero_ps(),\
9172 (__mmask16)(A), R))
9173 #define _mm512_roundscale_round_pd(A, B, R) \
9174 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
9175 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), R))
9176 #define _mm512_mask_roundscale_round_pd(A, B, C, D, R) \
9177 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
9178 (int)(D), \
9179 (__v8df)(__m512d)(A), \
9180 (__mmask8)(B), R))
9181 #define _mm512_maskz_roundscale_round_pd(A, B, C, R) \
9182 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
9183 (int)(C), \
9184 (__v8df)_mm512_setzero_pd(),\
9185 (__mmask8)(A), R))
9186 #define _mm_roundscale_round_ss(A, B, C, R) \
9187 ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \
9188 (__v4sf)(__m128)(B), (int)(C), R))
9189 #define _mm_roundscale_round_sd(A, B, C, R) \
9190 ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \
9191 (__v2df)(__m128d)(B), (int)(C), R))
9192 #endif
9194 extern __inline __m512
9195 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9196 _mm512_floor_ps (__m512 __A)
9198 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
9199 _MM_FROUND_FLOOR,
9200 (__v16sf) __A, -1,
9201 _MM_FROUND_CUR_DIRECTION);
9204 extern __inline __m512d
9205 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9206 _mm512_floor_pd (__m512d __A)
9208 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
9209 _MM_FROUND_FLOOR,
9210 (__v8df) __A, -1,
9211 _MM_FROUND_CUR_DIRECTION);
9214 extern __inline __m512
9215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9216 _mm512_ceil_ps (__m512 __A)
9218 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
9219 _MM_FROUND_CEIL,
9220 (__v16sf) __A, -1,
9221 _MM_FROUND_CUR_DIRECTION);
9224 extern __inline __m512d
9225 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9226 _mm512_ceil_pd (__m512d __A)
9228 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
9229 _MM_FROUND_CEIL,
9230 (__v8df) __A, -1,
9231 _MM_FROUND_CUR_DIRECTION);
9234 extern __inline __m512
9235 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9236 _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
9238 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
9239 _MM_FROUND_FLOOR,
9240 (__v16sf) __W, __U,
9241 _MM_FROUND_CUR_DIRECTION);
9244 extern __inline __m512d
9245 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9246 _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
9248 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
9249 _MM_FROUND_FLOOR,
9250 (__v8df) __W, __U,
9251 _MM_FROUND_CUR_DIRECTION);
9254 extern __inline __m512
9255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9256 _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
9258 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
9259 _MM_FROUND_CEIL,
9260 (__v16sf) __W, __U,
9261 _MM_FROUND_CUR_DIRECTION);
9264 extern __inline __m512d
9265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9266 _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
9268 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
9269 _MM_FROUND_CEIL,
9270 (__v8df) __W, __U,
9271 _MM_FROUND_CUR_DIRECTION);
9274 #ifdef __OPTIMIZE__
9275 extern __inline __m512i
9276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9277 _mm512_alignr_epi32 (__m512i __A, __m512i __B, const int __imm)
9279 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
9280 (__v16si) __B, __imm,
9281 (__v16si)
9282 _mm512_undefined_epi32 (),
9283 (__mmask16) -1);
9286 extern __inline __m512i
9287 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9288 _mm512_mask_alignr_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
9289 __m512i __B, const int __imm)
9291 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
9292 (__v16si) __B, __imm,
9293 (__v16si) __W,
9294 (__mmask16) __U);
9297 extern __inline __m512i
9298 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9299 _mm512_maskz_alignr_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
9300 const int __imm)
9302 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
9303 (__v16si) __B, __imm,
9304 (__v16si)
9305 _mm512_setzero_si512 (),
9306 (__mmask16) __U);
9309 extern __inline __m512i
9310 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9311 _mm512_alignr_epi64 (__m512i __A, __m512i __B, const int __imm)
9313 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
9314 (__v8di) __B, __imm,
9315 (__v8di)
9316 _mm512_undefined_epi32 (),
9317 (__mmask8) -1);
9320 extern __inline __m512i
9321 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9322 _mm512_mask_alignr_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
9323 __m512i __B, const int __imm)
9325 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
9326 (__v8di) __B, __imm,
9327 (__v8di) __W,
9328 (__mmask8) __U);
9331 extern __inline __m512i
9332 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9333 _mm512_maskz_alignr_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
9334 const int __imm)
9336 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
9337 (__v8di) __B, __imm,
9338 (__v8di)
9339 _mm512_setzero_si512 (),
9340 (__mmask8) __U);
9342 #else
9343 #define _mm512_alignr_epi32(X, Y, C) \
9344 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
9345 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_undefined_epi32 (),\
9346 (__mmask16)-1))
9348 #define _mm512_mask_alignr_epi32(W, U, X, Y, C) \
9349 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
9350 (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)(W), \
9351 (__mmask16)(U)))
9353 #define _mm512_maskz_alignr_epi32(U, X, Y, C) \
9354 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
9355 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_setzero_si512 (),\
9356 (__mmask16)(U)))
9358 #define _mm512_alignr_epi64(X, Y, C) \
9359 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
9360 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_undefined_epi32 (), \
9361 (__mmask8)-1))
9363 #define _mm512_mask_alignr_epi64(W, U, X, Y, C) \
9364 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
9365 (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)(W), (__mmask8)(U)))
9367 #define _mm512_maskz_alignr_epi64(U, X, Y, C) \
9368 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
9369 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_setzero_si512 (),\
9370 (__mmask8)(U)))
9371 #endif
9373 extern __inline __mmask16
9374 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9375 _mm512_cmpeq_epi32_mask (__m512i __A, __m512i __B)
9377 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
9378 (__v16si) __B,
9379 (__mmask16) -1);
9382 extern __inline __mmask16
9383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9384 _mm512_mask_cmpeq_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
9386 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
9387 (__v16si) __B, __U);
9390 extern __inline __mmask8
9391 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9392 _mm512_mask_cmpeq_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
9394 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
9395 (__v8di) __B, __U);
9398 extern __inline __mmask8
9399 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9400 _mm512_cmpeq_epi64_mask (__m512i __A, __m512i __B)
9402 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
9403 (__v8di) __B,
9404 (__mmask8) -1);
9407 extern __inline __mmask16
9408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9409 _mm512_cmpgt_epi32_mask (__m512i __A, __m512i __B)
9411 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
9412 (__v16si) __B,
9413 (__mmask16) -1);
9416 extern __inline __mmask16
9417 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9418 _mm512_mask_cmpgt_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
9420 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
9421 (__v16si) __B, __U);
9424 extern __inline __mmask8
9425 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9426 _mm512_mask_cmpgt_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
9428 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
9429 (__v8di) __B, __U);
9432 extern __inline __mmask8
9433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9434 _mm512_cmpgt_epi64_mask (__m512i __A, __m512i __B)
9436 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
9437 (__v8di) __B,
9438 (__mmask8) -1);
9441 extern __inline __mmask16
9442 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9443 _mm512_cmpge_epi32_mask (__m512i __X, __m512i __Y)
9445 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9446 (__v16si) __Y, 5,
9447 (__mmask16) -1);
9450 extern __inline __mmask16
9451 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9452 _mm512_mask_cmpge_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9454 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9455 (__v16si) __Y, 5,
9456 (__mmask16) __M);
9459 extern __inline __mmask16
9460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9461 _mm512_mask_cmpge_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9463 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9464 (__v16si) __Y, 5,
9465 (__mmask16) __M);
9468 extern __inline __mmask16
9469 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9470 _mm512_cmpge_epu32_mask (__m512i __X, __m512i __Y)
9472 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9473 (__v16si) __Y, 5,
9474 (__mmask16) -1);
9477 extern __inline __mmask8
9478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9479 _mm512_mask_cmpge_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9481 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9482 (__v8di) __Y, 5,
9483 (__mmask8) __M);
9486 extern __inline __mmask8
9487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9488 _mm512_cmpge_epi64_mask (__m512i __X, __m512i __Y)
9490 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9491 (__v8di) __Y, 5,
9492 (__mmask8) -1);
9495 extern __inline __mmask8
9496 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9497 _mm512_mask_cmpge_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9499 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9500 (__v8di) __Y, 5,
9501 (__mmask8) __M);
9504 extern __inline __mmask8
9505 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9506 _mm512_cmpge_epu64_mask (__m512i __X, __m512i __Y)
9508 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9509 (__v8di) __Y, 5,
9510 (__mmask8) -1);
9513 extern __inline __mmask16
9514 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9515 _mm512_mask_cmple_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9517 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9518 (__v16si) __Y, 2,
9519 (__mmask16) __M);
9522 extern __inline __mmask16
9523 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9524 _mm512_cmple_epi32_mask (__m512i __X, __m512i __Y)
9526 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9527 (__v16si) __Y, 2,
9528 (__mmask16) -1);
9531 extern __inline __mmask16
9532 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9533 _mm512_mask_cmple_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9535 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9536 (__v16si) __Y, 2,
9537 (__mmask16) __M);
9540 extern __inline __mmask16
9541 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9542 _mm512_cmple_epu32_mask (__m512i __X, __m512i __Y)
9544 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9545 (__v16si) __Y, 2,
9546 (__mmask16) -1);
9549 extern __inline __mmask8
9550 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9551 _mm512_mask_cmple_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9553 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9554 (__v8di) __Y, 2,
9555 (__mmask8) __M);
9558 extern __inline __mmask8
9559 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9560 _mm512_cmple_epi64_mask (__m512i __X, __m512i __Y)
9562 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9563 (__v8di) __Y, 2,
9564 (__mmask8) -1);
9567 extern __inline __mmask8
9568 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9569 _mm512_mask_cmple_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9571 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9572 (__v8di) __Y, 2,
9573 (__mmask8) __M);
9576 extern __inline __mmask8
9577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9578 _mm512_cmple_epu64_mask (__m512i __X, __m512i __Y)
9580 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9581 (__v8di) __Y, 2,
9582 (__mmask8) -1);
9585 extern __inline __mmask16
9586 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9587 _mm512_mask_cmplt_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9589 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9590 (__v16si) __Y, 1,
9591 (__mmask16) __M);
9594 extern __inline __mmask16
9595 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9596 _mm512_cmplt_epi32_mask (__m512i __X, __m512i __Y)
9598 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9599 (__v16si) __Y, 1,
9600 (__mmask16) -1);
9603 extern __inline __mmask16
9604 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9605 _mm512_mask_cmplt_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9607 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9608 (__v16si) __Y, 1,
9609 (__mmask16) __M);
9612 extern __inline __mmask16
9613 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9614 _mm512_cmplt_epu32_mask (__m512i __X, __m512i __Y)
9616 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9617 (__v16si) __Y, 1,
9618 (__mmask16) -1);
9621 extern __inline __mmask8
9622 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9623 _mm512_mask_cmplt_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9625 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9626 (__v8di) __Y, 1,
9627 (__mmask8) __M);
9630 extern __inline __mmask8
9631 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9632 _mm512_cmplt_epi64_mask (__m512i __X, __m512i __Y)
9634 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9635 (__v8di) __Y, 1,
9636 (__mmask8) -1);
9639 extern __inline __mmask8
9640 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9641 _mm512_mask_cmplt_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9643 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9644 (__v8di) __Y, 1,
9645 (__mmask8) __M);
9648 extern __inline __mmask8
9649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9650 _mm512_cmplt_epu64_mask (__m512i __X, __m512i __Y)
9652 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9653 (__v8di) __Y, 1,
9654 (__mmask8) -1);
9657 extern __inline __mmask16
9658 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9659 _mm512_cmpneq_epi32_mask (__m512i __X, __m512i __Y)
9661 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9662 (__v16si) __Y, 4,
9663 (__mmask16) -1);
9666 extern __inline __mmask16
9667 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9668 _mm512_mask_cmpneq_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9670 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9671 (__v16si) __Y, 4,
9672 (__mmask16) __M);
9675 extern __inline __mmask16
9676 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9677 _mm512_mask_cmpneq_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9679 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9680 (__v16si) __Y, 4,
9681 (__mmask16) __M);
9684 extern __inline __mmask16
9685 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9686 _mm512_cmpneq_epu32_mask (__m512i __X, __m512i __Y)
9688 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9689 (__v16si) __Y, 4,
9690 (__mmask16) -1);
9693 extern __inline __mmask8
9694 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9695 _mm512_mask_cmpneq_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9697 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9698 (__v8di) __Y, 4,
9699 (__mmask8) __M);
9702 extern __inline __mmask8
9703 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9704 _mm512_cmpneq_epi64_mask (__m512i __X, __m512i __Y)
9706 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9707 (__v8di) __Y, 4,
9708 (__mmask8) -1);
9711 extern __inline __mmask8
9712 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9713 _mm512_mask_cmpneq_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9715 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9716 (__v8di) __Y, 4,
9717 (__mmask8) __M);
9720 extern __inline __mmask8
9721 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9722 _mm512_cmpneq_epu64_mask (__m512i __X, __m512i __Y)
9724 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9725 (__v8di) __Y, 4,
9726 (__mmask8) -1);
9729 #define _MM_CMPINT_EQ 0x0
9730 #define _MM_CMPINT_LT 0x1
9731 #define _MM_CMPINT_LE 0x2
9732 #define _MM_CMPINT_UNUSED 0x3
9733 #define _MM_CMPINT_NE 0x4
9734 #define _MM_CMPINT_NLT 0x5
9735 #define _MM_CMPINT_GE 0x5
9736 #define _MM_CMPINT_NLE 0x6
9737 #define _MM_CMPINT_GT 0x6
9739 #ifdef __OPTIMIZE__
9740 extern __inline __mmask16
9741 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9742 _kshiftli_mask16 (__mmask16 __A, unsigned int __B)
9744 return (__mmask16) __builtin_ia32_kshiftlihi ((__mmask16) __A,
9745 (__mmask8) __B);
9748 extern __inline __mmask16
9749 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9750 _kshiftri_mask16 (__mmask16 __A, unsigned int __B)
9752 return (__mmask16) __builtin_ia32_kshiftrihi ((__mmask16) __A,
9753 (__mmask8) __B);
9756 extern __inline __mmask8
9757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9758 _mm512_cmp_epi64_mask (__m512i __X, __m512i __Y, const int __P)
9760 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9761 (__v8di) __Y, __P,
9762 (__mmask8) -1);
9765 extern __inline __mmask16
9766 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9767 _mm512_cmp_epi32_mask (__m512i __X, __m512i __Y, const int __P)
9769 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9770 (__v16si) __Y, __P,
9771 (__mmask16) -1);
9774 extern __inline __mmask8
9775 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9776 _mm512_cmp_epu64_mask (__m512i __X, __m512i __Y, const int __P)
9778 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9779 (__v8di) __Y, __P,
9780 (__mmask8) -1);
9783 extern __inline __mmask16
9784 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9785 _mm512_cmp_epu32_mask (__m512i __X, __m512i __Y, const int __P)
9787 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9788 (__v16si) __Y, __P,
9789 (__mmask16) -1);
9792 extern __inline __mmask8
9793 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9794 _mm512_cmp_round_pd_mask (__m512d __X, __m512d __Y, const int __P,
9795 const int __R)
9797 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
9798 (__v8df) __Y, __P,
9799 (__mmask8) -1, __R);
9802 extern __inline __mmask16
9803 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9804 _mm512_cmp_round_ps_mask (__m512 __X, __m512 __Y, const int __P, const int __R)
9806 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
9807 (__v16sf) __Y, __P,
9808 (__mmask16) -1, __R);
9811 extern __inline __mmask8
9812 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9813 _mm512_mask_cmp_epi64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
9814 const int __P)
9816 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9817 (__v8di) __Y, __P,
9818 (__mmask8) __U);
9821 extern __inline __mmask16
9822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9823 _mm512_mask_cmp_epi32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
9824 const int __P)
9826 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9827 (__v16si) __Y, __P,
9828 (__mmask16) __U);
9831 extern __inline __mmask8
9832 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9833 _mm512_mask_cmp_epu64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
9834 const int __P)
9836 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9837 (__v8di) __Y, __P,
9838 (__mmask8) __U);
9841 extern __inline __mmask16
9842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9843 _mm512_mask_cmp_epu32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
9844 const int __P)
9846 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9847 (__v16si) __Y, __P,
9848 (__mmask16) __U);
9851 extern __inline __mmask8
9852 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9853 _mm512_mask_cmp_round_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y,
9854 const int __P, const int __R)
9856 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
9857 (__v8df) __Y, __P,
9858 (__mmask8) __U, __R);
9861 extern __inline __mmask16
9862 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9863 _mm512_mask_cmp_round_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y,
9864 const int __P, const int __R)
9866 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
9867 (__v16sf) __Y, __P,
9868 (__mmask16) __U, __R);
9871 extern __inline __mmask8
9872 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9873 _mm_cmp_round_sd_mask (__m128d __X, __m128d __Y, const int __P, const int __R)
9875 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
9876 (__v2df) __Y, __P,
9877 (__mmask8) -1, __R);
9880 extern __inline __mmask8
9881 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9882 _mm_mask_cmp_round_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y,
9883 const int __P, const int __R)
9885 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
9886 (__v2df) __Y, __P,
9887 (__mmask8) __M, __R);
9890 extern __inline __mmask8
9891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9892 _mm_cmp_round_ss_mask (__m128 __X, __m128 __Y, const int __P, const int __R)
9894 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
9895 (__v4sf) __Y, __P,
9896 (__mmask8) -1, __R);
9899 extern __inline __mmask8
9900 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9901 _mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y,
9902 const int __P, const int __R)
9904 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
9905 (__v4sf) __Y, __P,
9906 (__mmask8) __M, __R);
9909 #else
9910 #define _kshiftli_mask16(X, Y) \
9911 ((__mmask16) __builtin_ia32_kshiftlihi ((__mmask16)(X), (__mmask8)(Y)))
9913 #define _kshiftri_mask16(X, Y) \
9914 ((__mmask16) __builtin_ia32_kshiftrihi ((__mmask16)(X), (__mmask8)(Y)))
9916 #define _mm512_cmp_epi64_mask(X, Y, P) \
9917 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
9918 (__v8di)(__m512i)(Y), (int)(P),\
9919 (__mmask8)-1))
9921 #define _mm512_cmp_epi32_mask(X, Y, P) \
9922 ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
9923 (__v16si)(__m512i)(Y), (int)(P), \
9924 (__mmask16)-1))
9926 #define _mm512_cmp_epu64_mask(X, Y, P) \
9927 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
9928 (__v8di)(__m512i)(Y), (int)(P),\
9929 (__mmask8)-1))
9931 #define _mm512_cmp_epu32_mask(X, Y, P) \
9932 ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
9933 (__v16si)(__m512i)(Y), (int)(P), \
9934 (__mmask16)-1))
9936 #define _mm512_cmp_round_pd_mask(X, Y, P, R) \
9937 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
9938 (__v8df)(__m512d)(Y), (int)(P),\
9939 (__mmask8)-1, R))
9941 #define _mm512_cmp_round_ps_mask(X, Y, P, R) \
9942 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
9943 (__v16sf)(__m512)(Y), (int)(P),\
9944 (__mmask16)-1, R))
9946 #define _mm512_mask_cmp_epi64_mask(M, X, Y, P) \
9947 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
9948 (__v8di)(__m512i)(Y), (int)(P),\
9949 (__mmask8)M))
9951 #define _mm512_mask_cmp_epi32_mask(M, X, Y, P) \
9952 ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
9953 (__v16si)(__m512i)(Y), (int)(P), \
9954 (__mmask16)M))
9956 #define _mm512_mask_cmp_epu64_mask(M, X, Y, P) \
9957 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
9958 (__v8di)(__m512i)(Y), (int)(P),\
9959 (__mmask8)M))
9961 #define _mm512_mask_cmp_epu32_mask(M, X, Y, P) \
9962 ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
9963 (__v16si)(__m512i)(Y), (int)(P), \
9964 (__mmask16)M))
9966 #define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R) \
9967 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
9968 (__v8df)(__m512d)(Y), (int)(P),\
9969 (__mmask8)M, R))
9971 #define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R) \
9972 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
9973 (__v16sf)(__m512)(Y), (int)(P),\
9974 (__mmask16)M, R))
9976 #define _mm_cmp_round_sd_mask(X, Y, P, R) \
9977 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
9978 (__v2df)(__m128d)(Y), (int)(P),\
9979 (__mmask8)-1, R))
9981 #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
9982 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
9983 (__v2df)(__m128d)(Y), (int)(P),\
9984 (M), R))
9986 #define _mm_cmp_round_ss_mask(X, Y, P, R) \
9987 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
9988 (__v4sf)(__m128)(Y), (int)(P), \
9989 (__mmask8)-1, R))
9991 #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
9992 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
9993 (__v4sf)(__m128)(Y), (int)(P), \
9994 (M), R))
9995 #endif
9997 #ifdef __OPTIMIZE__
9998 extern __inline __m512
9999 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10000 _mm512_i32gather_ps (__m512i __index, void const *__addr, int __scale)
10002 __m512 __v1_old = _mm512_undefined_ps ();
10003 __mmask16 __mask = 0xFFFF;
10005 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) __v1_old,
10006 __addr,
10007 (__v16si) __index,
10008 __mask, __scale);
10011 extern __inline __m512
10012 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10013 _mm512_mask_i32gather_ps (__m512 __v1_old, __mmask16 __mask,
10014 __m512i __index, void const *__addr, int __scale)
10016 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) __v1_old,
10017 __addr,
10018 (__v16si) __index,
10019 __mask, __scale);
10022 extern __inline __m512d
10023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10024 _mm512_i32gather_pd (__m256i __index, void const *__addr, int __scale)
10026 __m512d __v1_old = _mm512_undefined_pd ();
10027 __mmask8 __mask = 0xFF;
10029 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
10030 __addr,
10031 (__v8si) __index, __mask,
10032 __scale);
10035 extern __inline __m512d
10036 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10037 _mm512_mask_i32gather_pd (__m512d __v1_old, __mmask8 __mask,
10038 __m256i __index, void const *__addr, int __scale)
10040 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
10041 __addr,
10042 (__v8si) __index,
10043 __mask, __scale);
10046 extern __inline __m256
10047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10048 _mm512_i64gather_ps (__m512i __index, void const *__addr, int __scale)
10050 __m256 __v1_old = _mm256_undefined_ps ();
10051 __mmask8 __mask = 0xFF;
10053 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
10054 __addr,
10055 (__v8di) __index, __mask,
10056 __scale);
10059 extern __inline __m256
10060 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10061 _mm512_mask_i64gather_ps (__m256 __v1_old, __mmask8 __mask,
10062 __m512i __index, void const *__addr, int __scale)
10064 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
10065 __addr,
10066 (__v8di) __index,
10067 __mask, __scale);
10070 extern __inline __m512d
10071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10072 _mm512_i64gather_pd (__m512i __index, void const *__addr, int __scale)
10074 __m512d __v1_old = _mm512_undefined_pd ();
10075 __mmask8 __mask = 0xFF;
10077 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
10078 __addr,
10079 (__v8di) __index, __mask,
10080 __scale);
10083 extern __inline __m512d
10084 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10085 _mm512_mask_i64gather_pd (__m512d __v1_old, __mmask8 __mask,
10086 __m512i __index, void const *__addr, int __scale)
10088 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
10089 __addr,
10090 (__v8di) __index,
10091 __mask, __scale);
10094 extern __inline __m512i
10095 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10096 _mm512_i32gather_epi32 (__m512i __index, void const *__addr, int __scale)
10098 __m512i __v1_old = _mm512_undefined_epi32 ();
10099 __mmask16 __mask = 0xFFFF;
10101 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
10102 __addr,
10103 (__v16si) __index,
10104 __mask, __scale);
10107 extern __inline __m512i
10108 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10109 _mm512_mask_i32gather_epi32 (__m512i __v1_old, __mmask16 __mask,
10110 __m512i __index, void const *__addr, int __scale)
10112 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
10113 __addr,
10114 (__v16si) __index,
10115 __mask, __scale);
10118 extern __inline __m512i
10119 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10120 _mm512_i32gather_epi64 (__m256i __index, void const *__addr, int __scale)
10122 __m512i __v1_old = _mm512_undefined_epi32 ();
10123 __mmask8 __mask = 0xFF;
10125 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
10126 __addr,
10127 (__v8si) __index, __mask,
10128 __scale);
10131 extern __inline __m512i
10132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10133 _mm512_mask_i32gather_epi64 (__m512i __v1_old, __mmask8 __mask,
10134 __m256i __index, void const *__addr,
10135 int __scale)
10137 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
10138 __addr,
10139 (__v8si) __index,
10140 __mask, __scale);
10143 extern __inline __m256i
10144 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10145 _mm512_i64gather_epi32 (__m512i __index, void const *__addr, int __scale)
10147 __m256i __v1_old = _mm256_undefined_si256 ();
10148 __mmask8 __mask = 0xFF;
10150 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
10151 __addr,
10152 (__v8di) __index,
10153 __mask, __scale);
10156 extern __inline __m256i
10157 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10158 _mm512_mask_i64gather_epi32 (__m256i __v1_old, __mmask8 __mask,
10159 __m512i __index, void const *__addr, int __scale)
10161 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
10162 __addr,
10163 (__v8di) __index,
10164 __mask, __scale);
10167 extern __inline __m512i
10168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10169 _mm512_i64gather_epi64 (__m512i __index, void const *__addr, int __scale)
10171 __m512i __v1_old = _mm512_undefined_epi32 ();
10172 __mmask8 __mask = 0xFF;
10174 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
10175 __addr,
10176 (__v8di) __index, __mask,
10177 __scale);
10180 extern __inline __m512i
10181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10182 _mm512_mask_i64gather_epi64 (__m512i __v1_old, __mmask8 __mask,
10183 __m512i __index, void const *__addr,
10184 int __scale)
10186 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
10187 __addr,
10188 (__v8di) __index,
10189 __mask, __scale);
10192 extern __inline void
10193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10194 _mm512_i32scatter_ps (void *__addr, __m512i __index, __m512 __v1, int __scale)
10196 __builtin_ia32_scattersiv16sf (__addr, (__mmask16) 0xFFFF,
10197 (__v16si) __index, (__v16sf) __v1, __scale);
10200 extern __inline void
10201 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10202 _mm512_mask_i32scatter_ps (void *__addr, __mmask16 __mask,
10203 __m512i __index, __m512 __v1, int __scale)
10205 __builtin_ia32_scattersiv16sf (__addr, __mask, (__v16si) __index,
10206 (__v16sf) __v1, __scale);
10209 extern __inline void
10210 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10211 _mm512_i32scatter_pd (void *__addr, __m256i __index, __m512d __v1,
10212 int __scale)
10214 __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF,
10215 (__v8si) __index, (__v8df) __v1, __scale);
10218 extern __inline void
10219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10220 _mm512_mask_i32scatter_pd (void *__addr, __mmask8 __mask,
10221 __m256i __index, __m512d __v1, int __scale)
10223 __builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index,
10224 (__v8df) __v1, __scale);
10227 extern __inline void
10228 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10229 _mm512_i64scatter_ps (void *__addr, __m512i __index, __m256 __v1, int __scale)
10231 __builtin_ia32_scatterdiv16sf (__addr, (__mmask8) 0xFF,
10232 (__v8di) __index, (__v8sf) __v1, __scale);
10235 extern __inline void
10236 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10237 _mm512_mask_i64scatter_ps (void *__addr, __mmask8 __mask,
10238 __m512i __index, __m256 __v1, int __scale)
10240 __builtin_ia32_scatterdiv16sf (__addr, __mask, (__v8di) __index,
10241 (__v8sf) __v1, __scale);
10244 extern __inline void
10245 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10246 _mm512_i64scatter_pd (void *__addr, __m512i __index, __m512d __v1,
10247 int __scale)
10249 __builtin_ia32_scatterdiv8df (__addr, (__mmask8) 0xFF,
10250 (__v8di) __index, (__v8df) __v1, __scale);
10253 extern __inline void
10254 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10255 _mm512_mask_i64scatter_pd (void *__addr, __mmask8 __mask,
10256 __m512i __index, __m512d __v1, int __scale)
10258 __builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index,
10259 (__v8df) __v1, __scale);
10262 extern __inline void
10263 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10264 _mm512_i32scatter_epi32 (void *__addr, __m512i __index,
10265 __m512i __v1, int __scale)
10267 __builtin_ia32_scattersiv16si (__addr, (__mmask16) 0xFFFF,
10268 (__v16si) __index, (__v16si) __v1, __scale);
10271 extern __inline void
10272 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10273 _mm512_mask_i32scatter_epi32 (void *__addr, __mmask16 __mask,
10274 __m512i __index, __m512i __v1, int __scale)
10276 __builtin_ia32_scattersiv16si (__addr, __mask, (__v16si) __index,
10277 (__v16si) __v1, __scale);
10280 extern __inline void
10281 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10282 _mm512_i32scatter_epi64 (void *__addr, __m256i __index,
10283 __m512i __v1, int __scale)
10285 __builtin_ia32_scattersiv8di (__addr, (__mmask8) 0xFF,
10286 (__v8si) __index, (__v8di) __v1, __scale);
10289 extern __inline void
10290 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10291 _mm512_mask_i32scatter_epi64 (void *__addr, __mmask8 __mask,
10292 __m256i __index, __m512i __v1, int __scale)
10294 __builtin_ia32_scattersiv8di (__addr, __mask, (__v8si) __index,
10295 (__v8di) __v1, __scale);
10298 extern __inline void
10299 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10300 _mm512_i64scatter_epi32 (void *__addr, __m512i __index,
10301 __m256i __v1, int __scale)
10303 __builtin_ia32_scatterdiv16si (__addr, (__mmask8) 0xFF,
10304 (__v8di) __index, (__v8si) __v1, __scale);
10307 extern __inline void
10308 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10309 _mm512_mask_i64scatter_epi32 (void *__addr, __mmask8 __mask,
10310 __m512i __index, __m256i __v1, int __scale)
10312 __builtin_ia32_scatterdiv16si (__addr, __mask, (__v8di) __index,
10313 (__v8si) __v1, __scale);
10316 extern __inline void
10317 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10318 _mm512_i64scatter_epi64 (void *__addr, __m512i __index,
10319 __m512i __v1, int __scale)
10321 __builtin_ia32_scatterdiv8di (__addr, (__mmask8) 0xFF,
10322 (__v8di) __index, (__v8di) __v1, __scale);
10325 extern __inline void
10326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10327 _mm512_mask_i64scatter_epi64 (void *__addr, __mmask8 __mask,
10328 __m512i __index, __m512i __v1, int __scale)
10330 __builtin_ia32_scatterdiv8di (__addr, __mask, (__v8di) __index,
10331 (__v8di) __v1, __scale);
10333 #else
10334 #define _mm512_i32gather_ps(INDEX, ADDR, SCALE) \
10335 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)_mm512_undefined_ps(),\
10336 (void const *)ADDR, \
10337 (__v16si)(__m512i)INDEX, \
10338 (__mmask16)0xFFFF, (int)SCALE)
10340 #define _mm512_mask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
10341 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)(__m512)V1OLD, \
10342 (void const *)ADDR, \
10343 (__v16si)(__m512i)INDEX, \
10344 (__mmask16)MASK, (int)SCALE)
10346 #define _mm512_i32gather_pd(INDEX, ADDR, SCALE) \
10347 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)_mm512_undefined_pd(), \
10348 (void const *)ADDR, \
10349 (__v8si)(__m256i)INDEX, \
10350 (__mmask8)0xFF, (int)SCALE)
10352 #define _mm512_mask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
10353 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)(__m512d)V1OLD, \
10354 (void const *)ADDR, \
10355 (__v8si)(__m256i)INDEX, \
10356 (__mmask8)MASK, (int)SCALE)
10358 #define _mm512_i64gather_ps(INDEX, ADDR, SCALE) \
10359 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)_mm256_undefined_ps(), \
10360 (void const *)ADDR, \
10361 (__v8di)(__m512i)INDEX, \
10362 (__mmask8)0xFF, (int)SCALE)
10364 #define _mm512_mask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
10365 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)(__m256)V1OLD, \
10366 (void const *)ADDR, \
10367 (__v8di)(__m512i)INDEX, \
10368 (__mmask8)MASK, (int)SCALE)
10370 #define _mm512_i64gather_pd(INDEX, ADDR, SCALE) \
10371 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)_mm512_undefined_pd(), \
10372 (void const *)ADDR, \
10373 (__v8di)(__m512i)INDEX, \
10374 (__mmask8)0xFF, (int)SCALE)
10376 #define _mm512_mask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
10377 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)(__m512d)V1OLD, \
10378 (void const *)ADDR, \
10379 (__v8di)(__m512i)INDEX, \
10380 (__mmask8)MASK, (int)SCALE)
10382 #define _mm512_i32gather_epi32(INDEX, ADDR, SCALE) \
10383 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_undefined_epi32 (), \
10384 (void const *)ADDR, \
10385 (__v16si)(__m512i)INDEX, \
10386 (__mmask16)0xFFFF, (int)SCALE)
10388 #define _mm512_mask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
10389 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)(__m512i)V1OLD, \
10390 (void const *)ADDR, \
10391 (__v16si)(__m512i)INDEX, \
10392 (__mmask16)MASK, (int)SCALE)
10394 #define _mm512_i32gather_epi64(INDEX, ADDR, SCALE) \
10395 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_undefined_epi32 (), \
10396 (void const *)ADDR, \
10397 (__v8si)(__m256i)INDEX, \
10398 (__mmask8)0xFF, (int)SCALE)
10400 #define _mm512_mask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
10401 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)(__m512i)V1OLD, \
10402 (void const *)ADDR, \
10403 (__v8si)(__m256i)INDEX, \
10404 (__mmask8)MASK, (int)SCALE)
10406 #define _mm512_i64gather_epi32(INDEX, ADDR, SCALE) \
10407 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_undefined_si256(), \
10408 (void const *)ADDR, \
10409 (__v8di)(__m512i)INDEX, \
10410 (__mmask8)0xFF, (int)SCALE)
10412 #define _mm512_mask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
10413 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)(__m256i)V1OLD, \
10414 (void const *)ADDR, \
10415 (__v8di)(__m512i)INDEX, \
10416 (__mmask8)MASK, (int)SCALE)
10418 #define _mm512_i64gather_epi64(INDEX, ADDR, SCALE) \
10419 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_undefined_epi32 (), \
10420 (void const *)ADDR, \
10421 (__v8di)(__m512i)INDEX, \
10422 (__mmask8)0xFF, (int)SCALE)
10424 #define _mm512_mask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
10425 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)(__m512i)V1OLD, \
10426 (void const *)ADDR, \
10427 (__v8di)(__m512i)INDEX, \
10428 (__mmask8)MASK, (int)SCALE)
10430 #define _mm512_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
10431 __builtin_ia32_scattersiv16sf ((void *)ADDR, (__mmask16)0xFFFF, \
10432 (__v16si)(__m512i)INDEX, \
10433 (__v16sf)(__m512)V1, (int)SCALE)
10435 #define _mm512_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
10436 __builtin_ia32_scattersiv16sf ((void *)ADDR, (__mmask16)MASK, \
10437 (__v16si)(__m512i)INDEX, \
10438 (__v16sf)(__m512)V1, (int)SCALE)
10440 #define _mm512_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
10441 __builtin_ia32_scattersiv8df ((void *)ADDR, (__mmask8)0xFF, \
10442 (__v8si)(__m256i)INDEX, \
10443 (__v8df)(__m512d)V1, (int)SCALE)
10445 #define _mm512_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
10446 __builtin_ia32_scattersiv8df ((void *)ADDR, (__mmask8)MASK, \
10447 (__v8si)(__m256i)INDEX, \
10448 (__v8df)(__m512d)V1, (int)SCALE)
10450 #define _mm512_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
10451 __builtin_ia32_scatterdiv16sf ((void *)ADDR, (__mmask8)0xFF, \
10452 (__v8di)(__m512i)INDEX, \
10453 (__v8sf)(__m256)V1, (int)SCALE)
10455 #define _mm512_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
10456 __builtin_ia32_scatterdiv16sf ((void *)ADDR, (__mmask16)MASK, \
10457 (__v8di)(__m512i)INDEX, \
10458 (__v8sf)(__m256)V1, (int)SCALE)
10460 #define _mm512_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
10461 __builtin_ia32_scatterdiv8df ((void *)ADDR, (__mmask8)0xFF, \
10462 (__v8di)(__m512i)INDEX, \
10463 (__v8df)(__m512d)V1, (int)SCALE)
10465 #define _mm512_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
10466 __builtin_ia32_scatterdiv8df ((void *)ADDR, (__mmask8)MASK, \
10467 (__v8di)(__m512i)INDEX, \
10468 (__v8df)(__m512d)V1, (int)SCALE)
10470 #define _mm512_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
10471 __builtin_ia32_scattersiv16si ((void *)ADDR, (__mmask16)0xFFFF, \
10472 (__v16si)(__m512i)INDEX, \
10473 (__v16si)(__m512i)V1, (int)SCALE)
10475 #define _mm512_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
10476 __builtin_ia32_scattersiv16si ((void *)ADDR, (__mmask16)MASK, \
10477 (__v16si)(__m512i)INDEX, \
10478 (__v16si)(__m512i)V1, (int)SCALE)
10480 #define _mm512_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
10481 __builtin_ia32_scattersiv8di ((void *)ADDR, (__mmask8)0xFF, \
10482 (__v8si)(__m256i)INDEX, \
10483 (__v8di)(__m512i)V1, (int)SCALE)
10485 #define _mm512_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
10486 __builtin_ia32_scattersiv8di ((void *)ADDR, (__mmask8)MASK, \
10487 (__v8si)(__m256i)INDEX, \
10488 (__v8di)(__m512i)V1, (int)SCALE)
10490 #define _mm512_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
10491 __builtin_ia32_scatterdiv16si ((void *)ADDR, (__mmask8)0xFF, \
10492 (__v8di)(__m512i)INDEX, \
10493 (__v8si)(__m256i)V1, (int)SCALE)
10495 #define _mm512_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
10496 __builtin_ia32_scatterdiv16si ((void *)ADDR, (__mmask8)MASK, \
10497 (__v8di)(__m512i)INDEX, \
10498 (__v8si)(__m256i)V1, (int)SCALE)
10500 #define _mm512_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
10501 __builtin_ia32_scatterdiv8di ((void *)ADDR, (__mmask8)0xFF, \
10502 (__v8di)(__m512i)INDEX, \
10503 (__v8di)(__m512i)V1, (int)SCALE)
10505 #define _mm512_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
10506 __builtin_ia32_scatterdiv8di ((void *)ADDR, (__mmask8)MASK, \
10507 (__v8di)(__m512i)INDEX, \
10508 (__v8di)(__m512i)V1, (int)SCALE)
10509 #endif
10511 extern __inline __m512d
10512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10513 _mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
10515 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
10516 (__v8df) __W,
10517 (__mmask8) __U);
10520 extern __inline __m512d
10521 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10522 _mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
10524 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
10525 (__v8df)
10526 _mm512_setzero_pd (),
10527 (__mmask8) __U);
10530 extern __inline void
10531 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10532 _mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
10534 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
10535 (__mmask8) __U);
10538 extern __inline __m512
10539 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10540 _mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
10542 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
10543 (__v16sf) __W,
10544 (__mmask16) __U);
10547 extern __inline __m512
10548 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10549 _mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
10551 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
10552 (__v16sf)
10553 _mm512_setzero_ps (),
10554 (__mmask16) __U);
10557 extern __inline void
10558 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10559 _mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
10561 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
10562 (__mmask16) __U);
10565 extern __inline __m512i
10566 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10567 _mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
10569 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
10570 (__v8di) __W,
10571 (__mmask8) __U);
10574 extern __inline __m512i
10575 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10576 _mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
10578 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
10579 (__v8di)
10580 _mm512_setzero_si512 (),
10581 (__mmask8) __U);
10584 extern __inline void
10585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10586 _mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
10588 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
10589 (__mmask8) __U);
10592 extern __inline __m512i
10593 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10594 _mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
10596 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
10597 (__v16si) __W,
10598 (__mmask16) __U);
10601 extern __inline __m512i
10602 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10603 _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
10605 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
10606 (__v16si)
10607 _mm512_setzero_si512 (),
10608 (__mmask16) __U);
10611 extern __inline void
10612 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10613 _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
10615 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
10616 (__mmask16) __U);
10619 extern __inline __m512d
10620 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10621 _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
10623 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
10624 (__v8df) __W,
10625 (__mmask8) __U);
10628 extern __inline __m512d
10629 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10630 _mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
10632 return (__m512d) __builtin_ia32_expanddf512_maskz ((__v8df) __A,
10633 (__v8df)
10634 _mm512_setzero_pd (),
10635 (__mmask8) __U);
10638 extern __inline __m512d
10639 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10640 _mm512_mask_expandloadu_pd (__m512d __W, __mmask8 __U, void const *__P)
10642 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *) __P,
10643 (__v8df) __W,
10644 (__mmask8) __U);
10647 extern __inline __m512d
10648 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10649 _mm512_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
10651 return (__m512d) __builtin_ia32_expandloaddf512_maskz ((const __v8df *) __P,
10652 (__v8df)
10653 _mm512_setzero_pd (),
10654 (__mmask8) __U);
10657 extern __inline __m512
10658 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10659 _mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
10661 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
10662 (__v16sf) __W,
10663 (__mmask16) __U);
10666 extern __inline __m512
10667 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10668 _mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
10670 return (__m512) __builtin_ia32_expandsf512_maskz ((__v16sf) __A,
10671 (__v16sf)
10672 _mm512_setzero_ps (),
10673 (__mmask16) __U);
10676 extern __inline __m512
10677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10678 _mm512_mask_expandloadu_ps (__m512 __W, __mmask16 __U, void const *__P)
10680 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *) __P,
10681 (__v16sf) __W,
10682 (__mmask16) __U);
10685 extern __inline __m512
10686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10687 _mm512_maskz_expandloadu_ps (__mmask16 __U, void const *__P)
10689 return (__m512) __builtin_ia32_expandloadsf512_maskz ((const __v16sf *) __P,
10690 (__v16sf)
10691 _mm512_setzero_ps (),
10692 (__mmask16) __U);
10695 extern __inline __m512i
10696 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10697 _mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
10699 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
10700 (__v8di) __W,
10701 (__mmask8) __U);
10704 extern __inline __m512i
10705 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10706 _mm512_maskz_expand_epi64 (__mmask8 __U, __m512i __A)
10708 return (__m512i) __builtin_ia32_expanddi512_maskz ((__v8di) __A,
10709 (__v8di)
10710 _mm512_setzero_si512 (),
10711 (__mmask8) __U);
10714 extern __inline __m512i
10715 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10716 _mm512_mask_expandloadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
10718 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *) __P,
10719 (__v8di) __W,
10720 (__mmask8) __U);
10723 extern __inline __m512i
10724 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10725 _mm512_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
10727 return (__m512i)
10728 __builtin_ia32_expandloaddi512_maskz ((const __v8di *) __P,
10729 (__v8di)
10730 _mm512_setzero_si512 (),
10731 (__mmask8) __U);
10734 extern __inline __m512i
10735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10736 _mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
10738 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
10739 (__v16si) __W,
10740 (__mmask16) __U);
10743 extern __inline __m512i
10744 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10745 _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
10747 return (__m512i) __builtin_ia32_expandsi512_maskz ((__v16si) __A,
10748 (__v16si)
10749 _mm512_setzero_si512 (),
10750 (__mmask16) __U);
10753 extern __inline __m512i
10754 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10755 _mm512_mask_expandloadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
10757 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *) __P,
10758 (__v16si) __W,
10759 (__mmask16) __U);
10762 extern __inline __m512i
10763 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10764 _mm512_maskz_expandloadu_epi32 (__mmask16 __U, void const *__P)
10766 return (__m512i) __builtin_ia32_expandloadsi512_maskz ((const __v16si *) __P,
10767 (__v16si)
10768 _mm512_setzero_si512
10769 (), (__mmask16) __U);
10772 /* Mask arithmetic operations */
10773 #define _kand_mask16 _mm512_kand
10774 #define _kandn_mask16 _mm512_kandn
10775 #define _knot_mask16 _mm512_knot
10776 #define _kor_mask16 _mm512_kor
10777 #define _kxnor_mask16 _mm512_kxnor
10778 #define _kxor_mask16 _mm512_kxor
10780 extern __inline unsigned char
10781 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10782 _kortest_mask16_u8 (__mmask16 __A, __mmask16 __B, unsigned char *__CF)
10784 *__CF = (unsigned char) __builtin_ia32_kortestchi (__A, __B);
10785 return (unsigned char) __builtin_ia32_kortestzhi (__A, __B);
10788 extern __inline unsigned char
10789 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10790 _kortestz_mask16_u8 (__mmask16 __A, __mmask16 __B)
10792 return (unsigned char) __builtin_ia32_kortestzhi ((__mmask16) __A,
10793 (__mmask16) __B);
10796 extern __inline unsigned char
10797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10798 _kortestc_mask16_u8 (__mmask16 __A, __mmask16 __B)
10800 return (unsigned char) __builtin_ia32_kortestchi ((__mmask16) __A,
10801 (__mmask16) __B);
10804 extern __inline unsigned int
10805 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10806 _cvtmask16_u32 (__mmask16 __A)
10808 return (unsigned int) __builtin_ia32_kmovw ((__mmask16 ) __A);
10811 extern __inline __mmask16
10812 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10813 _cvtu32_mask16 (unsigned int __A)
10815 return (__mmask16) __builtin_ia32_kmovw ((__mmask16 ) __A);
10818 extern __inline __mmask16
10819 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10820 _load_mask16 (__mmask16 *__A)
10822 return (__mmask16) __builtin_ia32_kmovw (*(__mmask16 *) __A);
10825 extern __inline void
10826 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10827 _store_mask16 (__mmask16 *__A, __mmask16 __B)
10829 *(__mmask16 *) __A = __builtin_ia32_kmovw (__B);
10832 extern __inline __mmask16
10833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10834 _mm512_kand (__mmask16 __A, __mmask16 __B)
10836 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
10839 extern __inline __mmask16
10840 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10841 _mm512_kandn (__mmask16 __A, __mmask16 __B)
10843 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A,
10844 (__mmask16) __B);
10847 extern __inline __mmask16
10848 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10849 _mm512_kor (__mmask16 __A, __mmask16 __B)
10851 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
10854 extern __inline int
10855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10856 _mm512_kortestz (__mmask16 __A, __mmask16 __B)
10858 return (__mmask16) __builtin_ia32_kortestzhi ((__mmask16) __A,
10859 (__mmask16) __B);
10862 extern __inline int
10863 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10864 _mm512_kortestc (__mmask16 __A, __mmask16 __B)
10866 return (__mmask16) __builtin_ia32_kortestchi ((__mmask16) __A,
10867 (__mmask16) __B);
10870 extern __inline __mmask16
10871 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10872 _mm512_kxnor (__mmask16 __A, __mmask16 __B)
10874 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
10877 extern __inline __mmask16
10878 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10879 _mm512_kxor (__mmask16 __A, __mmask16 __B)
10881 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
10884 extern __inline __mmask16
10885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10886 _mm512_knot (__mmask16 __A)
10888 return (__mmask16) __builtin_ia32_knothi ((__mmask16) __A);
10891 extern __inline __mmask16
10892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10893 _mm512_kunpackb (__mmask16 __A, __mmask16 __B)
10895 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
10898 extern __inline __mmask16
10899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10900 _kunpackb_mask16 (__mmask8 __A, __mmask8 __B)
10902 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
10905 #ifdef __OPTIMIZE__
10906 extern __inline __m512i
10907 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10908 _mm512_maskz_inserti32x4 (__mmask16 __B, __m512i __C, __m128i __D,
10909 const int __imm)
10911 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
10912 (__v4si) __D,
10913 __imm,
10914 (__v16si)
10915 _mm512_setzero_si512 (),
10916 __B);
10919 extern __inline __m512
10920 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10921 _mm512_maskz_insertf32x4 (__mmask16 __B, __m512 __C, __m128 __D,
10922 const int __imm)
10924 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
10925 (__v4sf) __D,
10926 __imm,
10927 (__v16sf)
10928 _mm512_setzero_ps (), __B);
10931 extern __inline __m512i
10932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10933 _mm512_mask_inserti32x4 (__m512i __A, __mmask16 __B, __m512i __C,
10934 __m128i __D, const int __imm)
10936 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
10937 (__v4si) __D,
10938 __imm,
10939 (__v16si) __A,
10940 __B);
10943 extern __inline __m512
10944 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10945 _mm512_mask_insertf32x4 (__m512 __A, __mmask16 __B, __m512 __C,
10946 __m128 __D, const int __imm)
10948 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
10949 (__v4sf) __D,
10950 __imm,
10951 (__v16sf) __A, __B);
10953 #else
10954 #define _mm512_maskz_insertf32x4(A, X, Y, C) \
10955 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
10956 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)_mm512_setzero_ps(), \
10957 (__mmask16)(A)))
10959 #define _mm512_maskz_inserti32x4(A, X, Y, C) \
10960 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
10961 (__v4si)(__m128i) (Y), (int) (C), (__v16si)_mm512_setzero_si512 (), \
10962 (__mmask16)(A)))
10964 #define _mm512_mask_insertf32x4(A, B, X, Y, C) \
10965 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
10966 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (A), \
10967 (__mmask16)(B)))
10969 #define _mm512_mask_inserti32x4(A, B, X, Y, C) \
10970 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
10971 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (A), \
10972 (__mmask16)(B)))
10973 #endif
10975 extern __inline __m512i
10976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10977 _mm512_max_epi64 (__m512i __A, __m512i __B)
10979 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10980 (__v8di) __B,
10981 (__v8di)
10982 _mm512_undefined_epi32 (),
10983 (__mmask8) -1);
10986 extern __inline __m512i
10987 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10988 _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
10990 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10991 (__v8di) __B,
10992 (__v8di)
10993 _mm512_setzero_si512 (),
10994 __M);
10997 extern __inline __m512i
10998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10999 _mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
11001 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
11002 (__v8di) __B,
11003 (__v8di) __W, __M);
11006 extern __inline __m512i
11007 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11008 _mm512_min_epi64 (__m512i __A, __m512i __B)
11010 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
11011 (__v8di) __B,
11012 (__v8di)
11013 _mm512_undefined_epi32 (),
11014 (__mmask8) -1);
11017 extern __inline __m512i
11018 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11019 _mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
11021 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
11022 (__v8di) __B,
11023 (__v8di) __W, __M);
11026 extern __inline __m512i
11027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11028 _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
11030 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
11031 (__v8di) __B,
11032 (__v8di)
11033 _mm512_setzero_si512 (),
11034 __M);
11037 extern __inline __m512i
11038 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11039 _mm512_max_epu64 (__m512i __A, __m512i __B)
11041 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
11042 (__v8di) __B,
11043 (__v8di)
11044 _mm512_undefined_epi32 (),
11045 (__mmask8) -1);
11048 extern __inline __m512i
11049 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11050 _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
11052 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
11053 (__v8di) __B,
11054 (__v8di)
11055 _mm512_setzero_si512 (),
11056 __M);
11059 extern __inline __m512i
11060 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11061 _mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
11063 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
11064 (__v8di) __B,
11065 (__v8di) __W, __M);
11068 extern __inline __m512i
11069 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11070 _mm512_min_epu64 (__m512i __A, __m512i __B)
11072 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
11073 (__v8di) __B,
11074 (__v8di)
11075 _mm512_undefined_epi32 (),
11076 (__mmask8) -1);
11079 extern __inline __m512i
11080 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11081 _mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
11083 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
11084 (__v8di) __B,
11085 (__v8di) __W, __M);
11088 extern __inline __m512i
11089 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11090 _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
11092 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
11093 (__v8di) __B,
11094 (__v8di)
11095 _mm512_setzero_si512 (),
11096 __M);
11099 extern __inline __m512i
11100 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11101 _mm512_max_epi32 (__m512i __A, __m512i __B)
11103 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
11104 (__v16si) __B,
11105 (__v16si)
11106 _mm512_undefined_epi32 (),
11107 (__mmask16) -1);
11110 extern __inline __m512i
11111 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11112 _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
11114 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
11115 (__v16si) __B,
11116 (__v16si)
11117 _mm512_setzero_si512 (),
11118 __M);
11121 extern __inline __m512i
11122 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11123 _mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
11125 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
11126 (__v16si) __B,
11127 (__v16si) __W, __M);
11130 extern __inline __m512i
11131 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11132 _mm512_min_epi32 (__m512i __A, __m512i __B)
11134 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
11135 (__v16si) __B,
11136 (__v16si)
11137 _mm512_undefined_epi32 (),
11138 (__mmask16) -1);
11141 extern __inline __m512i
11142 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11143 _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
11145 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
11146 (__v16si) __B,
11147 (__v16si)
11148 _mm512_setzero_si512 (),
11149 __M);
11152 extern __inline __m512i
11153 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11154 _mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
11156 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
11157 (__v16si) __B,
11158 (__v16si) __W, __M);
11161 extern __inline __m512i
11162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11163 _mm512_max_epu32 (__m512i __A, __m512i __B)
11165 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
11166 (__v16si) __B,
11167 (__v16si)
11168 _mm512_undefined_epi32 (),
11169 (__mmask16) -1);
11172 extern __inline __m512i
11173 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11174 _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
11176 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
11177 (__v16si) __B,
11178 (__v16si)
11179 _mm512_setzero_si512 (),
11180 __M);
11183 extern __inline __m512i
11184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11185 _mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
11187 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
11188 (__v16si) __B,
11189 (__v16si) __W, __M);
11192 extern __inline __m512i
11193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11194 _mm512_min_epu32 (__m512i __A, __m512i __B)
11196 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
11197 (__v16si) __B,
11198 (__v16si)
11199 _mm512_undefined_epi32 (),
11200 (__mmask16) -1);
11203 extern __inline __m512i
11204 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11205 _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
11207 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
11208 (__v16si) __B,
11209 (__v16si)
11210 _mm512_setzero_si512 (),
11211 __M);
11214 extern __inline __m512i
11215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11216 _mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
11218 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
11219 (__v16si) __B,
11220 (__v16si) __W, __M);
11223 extern __inline __m512
11224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11225 _mm512_unpacklo_ps (__m512 __A, __m512 __B)
11227 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
11228 (__v16sf) __B,
11229 (__v16sf)
11230 _mm512_undefined_ps (),
11231 (__mmask16) -1);
11234 extern __inline __m512
11235 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11236 _mm512_mask_unpacklo_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11238 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
11239 (__v16sf) __B,
11240 (__v16sf) __W,
11241 (__mmask16) __U);
11244 extern __inline __m512
11245 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11246 _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
11248 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
11249 (__v16sf) __B,
11250 (__v16sf)
11251 _mm512_setzero_ps (),
11252 (__mmask16) __U);
11255 #ifdef __OPTIMIZE__
11256 extern __inline __m128d
11257 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11258 _mm_max_round_sd (__m128d __A, __m128d __B, const int __R)
11260 return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
11261 (__v2df) __B,
11262 __R);
11265 extern __inline __m128d
11266 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11267 _mm_mask_max_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
11268 __m128d __B, const int __R)
11270 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
11271 (__v2df) __B,
11272 (__v2df) __W,
11273 (__mmask8) __U, __R);
11276 extern __inline __m128d
11277 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11278 _mm_maskz_max_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
11279 const int __R)
11281 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
11282 (__v2df) __B,
11283 (__v2df)
11284 _mm_setzero_pd (),
11285 (__mmask8) __U, __R);
11288 extern __inline __m128
11289 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11290 _mm_max_round_ss (__m128 __A, __m128 __B, const int __R)
11292 return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
11293 (__v4sf) __B,
11294 __R);
11297 extern __inline __m128
11298 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11299 _mm_mask_max_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
11300 __m128 __B, const int __R)
11302 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
11303 (__v4sf) __B,
11304 (__v4sf) __W,
11305 (__mmask8) __U, __R);
11308 extern __inline __m128
11309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11310 _mm_maskz_max_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
11311 const int __R)
11313 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
11314 (__v4sf) __B,
11315 (__v4sf)
11316 _mm_setzero_ps (),
11317 (__mmask8) __U, __R);
11320 extern __inline __m128d
11321 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11322 _mm_min_round_sd (__m128d __A, __m128d __B, const int __R)
11324 return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
11325 (__v2df) __B,
11326 __R);
11329 extern __inline __m128d
11330 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11331 _mm_mask_min_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
11332 __m128d __B, const int __R)
11334 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
11335 (__v2df) __B,
11336 (__v2df) __W,
11337 (__mmask8) __U, __R);
11340 extern __inline __m128d
11341 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11342 _mm_maskz_min_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
11343 const int __R)
11345 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
11346 (__v2df) __B,
11347 (__v2df)
11348 _mm_setzero_pd (),
11349 (__mmask8) __U, __R);
11352 extern __inline __m128
11353 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11354 _mm_min_round_ss (__m128 __A, __m128 __B, const int __R)
11356 return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
11357 (__v4sf) __B,
11358 __R);
11361 extern __inline __m128
11362 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11363 _mm_mask_min_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
11364 __m128 __B, const int __R)
11366 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
11367 (__v4sf) __B,
11368 (__v4sf) __W,
11369 (__mmask8) __U, __R);
11372 extern __inline __m128
11373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11374 _mm_maskz_min_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
11375 const int __R)
11377 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
11378 (__v4sf) __B,
11379 (__v4sf)
11380 _mm_setzero_ps (),
11381 (__mmask8) __U, __R);
11384 #else
11385 #define _mm_max_round_sd(A, B, C) \
11386 (__m128d)__builtin_ia32_maxsd_round(A, B, C)
11388 #define _mm_mask_max_round_sd(W, U, A, B, C) \
11389 (__m128d)__builtin_ia32_maxsd_mask_round(A, B, W, U, C)
11391 #define _mm_maskz_max_round_sd(U, A, B, C) \
11392 (__m128d)__builtin_ia32_maxsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
11394 #define _mm_max_round_ss(A, B, C) \
11395 (__m128)__builtin_ia32_maxss_round(A, B, C)
11397 #define _mm_mask_max_round_ss(W, U, A, B, C) \
11398 (__m128)__builtin_ia32_maxss_mask_round(A, B, W, U, C)
11400 #define _mm_maskz_max_round_ss(U, A, B, C) \
11401 (__m128)__builtin_ia32_maxss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
11403 #define _mm_min_round_sd(A, B, C) \
11404 (__m128d)__builtin_ia32_minsd_round(A, B, C)
11406 #define _mm_mask_min_round_sd(W, U, A, B, C) \
11407 (__m128d)__builtin_ia32_minsd_mask_round(A, B, W, U, C)
11409 #define _mm_maskz_min_round_sd(U, A, B, C) \
11410 (__m128d)__builtin_ia32_minsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
11412 #define _mm_min_round_ss(A, B, C) \
11413 (__m128)__builtin_ia32_minss_round(A, B, C)
11415 #define _mm_mask_min_round_ss(W, U, A, B, C) \
11416 (__m128)__builtin_ia32_minss_mask_round(A, B, W, U, C)
11418 #define _mm_maskz_min_round_ss(U, A, B, C) \
11419 (__m128)__builtin_ia32_minss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
11421 #endif
11423 extern __inline __m512d
11424 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11425 _mm512_mask_blend_pd (__mmask8 __U, __m512d __A, __m512d __W)
11427 return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
11428 (__v8df) __W,
11429 (__mmask8) __U);
11432 extern __inline __m512
11433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11434 _mm512_mask_blend_ps (__mmask16 __U, __m512 __A, __m512 __W)
11436 return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
11437 (__v16sf) __W,
11438 (__mmask16) __U);
11441 extern __inline __m512i
11442 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11443 _mm512_mask_blend_epi64 (__mmask8 __U, __m512i __A, __m512i __W)
11445 return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
11446 (__v8di) __W,
11447 (__mmask8) __U);
11450 extern __inline __m512i
11451 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11452 _mm512_mask_blend_epi32 (__mmask16 __U, __m512i __A, __m512i __W)
11454 return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
11455 (__v16si) __W,
11456 (__mmask16) __U);
11459 #ifdef __OPTIMIZE__
11460 extern __inline __m128d
11461 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11462 _mm_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
11464 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
11465 (__v2df) __A,
11466 (__v2df) __B,
11467 __R);
11470 extern __inline __m128
11471 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11472 _mm_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
11474 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
11475 (__v4sf) __A,
11476 (__v4sf) __B,
11477 __R);
11480 extern __inline __m128d
11481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11482 _mm_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
11484 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
11485 (__v2df) __A,
11486 -(__v2df) __B,
11487 __R);
11490 extern __inline __m128
11491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11492 _mm_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
11494 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
11495 (__v4sf) __A,
11496 -(__v4sf) __B,
11497 __R);
11500 extern __inline __m128d
11501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11502 _mm_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
11504 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
11505 -(__v2df) __A,
11506 (__v2df) __B,
11507 __R);
11510 extern __inline __m128
11511 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11512 _mm_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
11514 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
11515 -(__v4sf) __A,
11516 (__v4sf) __B,
11517 __R);
11520 extern __inline __m128d
11521 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11522 _mm_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
11524 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
11525 -(__v2df) __A,
11526 -(__v2df) __B,
11527 __R);
11530 extern __inline __m128
11531 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11532 _mm_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
11534 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
11535 -(__v4sf) __A,
11536 -(__v4sf) __B,
11537 __R);
11539 #else
11540 #define _mm_fmadd_round_sd(A, B, C, R) \
11541 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, C, R)
11543 #define _mm_fmadd_round_ss(A, B, C, R) \
11544 (__m128)__builtin_ia32_vfmaddss3_round(A, B, C, R)
11546 #define _mm_fmsub_round_sd(A, B, C, R) \
11547 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, -(C), R)
11549 #define _mm_fmsub_round_ss(A, B, C, R) \
11550 (__m128)__builtin_ia32_vfmaddss3_round(A, B, -(C), R)
11552 #define _mm_fnmadd_round_sd(A, B, C, R) \
11553 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), C, R)
11555 #define _mm_fnmadd_round_ss(A, B, C, R) \
11556 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), C, R)
11558 #define _mm_fnmsub_round_sd(A, B, C, R) \
11559 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), -(C), R)
11561 #define _mm_fnmsub_round_ss(A, B, C, R) \
11562 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), -(C), R)
11563 #endif
11565 extern __inline __m128d
11566 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11567 _mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
11569 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
11570 (__v2df) __A,
11571 (__v2df) __B,
11572 (__mmask8) __U,
11573 _MM_FROUND_CUR_DIRECTION);
11576 extern __inline __m128
11577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11578 _mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
11580 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
11581 (__v4sf) __A,
11582 (__v4sf) __B,
11583 (__mmask8) __U,
11584 _MM_FROUND_CUR_DIRECTION);
11587 extern __inline __m128d
11588 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11589 _mm_mask3_fmadd_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U)
11591 return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
11592 (__v2df) __A,
11593 (__v2df) __B,
11594 (__mmask8) __U,
11595 _MM_FROUND_CUR_DIRECTION);
11598 extern __inline __m128
11599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11600 _mm_mask3_fmadd_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U)
11602 return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
11603 (__v4sf) __A,
11604 (__v4sf) __B,
11605 (__mmask8) __U,
11606 _MM_FROUND_CUR_DIRECTION);
11609 extern __inline __m128d
11610 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11611 _mm_maskz_fmadd_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B)
11613 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
11614 (__v2df) __A,
11615 (__v2df) __B,
11616 (__mmask8) __U,
11617 _MM_FROUND_CUR_DIRECTION);
11620 extern __inline __m128
11621 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11622 _mm_maskz_fmadd_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B)
11624 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
11625 (__v4sf) __A,
11626 (__v4sf) __B,
11627 (__mmask8) __U,
11628 _MM_FROUND_CUR_DIRECTION);
11631 extern __inline __m128d
11632 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11633 _mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
11635 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
11636 (__v2df) __A,
11637 -(__v2df) __B,
11638 (__mmask8) __U,
11639 _MM_FROUND_CUR_DIRECTION);
11642 extern __inline __m128
11643 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11644 _mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
11646 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
11647 (__v4sf) __A,
11648 -(__v4sf) __B,
11649 (__mmask8) __U,
11650 _MM_FROUND_CUR_DIRECTION);
11653 extern __inline __m128d
11654 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11655 _mm_mask3_fmsub_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U)
11657 return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
11658 (__v2df) __A,
11659 (__v2df) __B,
11660 (__mmask8) __U,
11661 _MM_FROUND_CUR_DIRECTION);
11664 extern __inline __m128
11665 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11666 _mm_mask3_fmsub_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U)
11668 return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
11669 (__v4sf) __A,
11670 (__v4sf) __B,
11671 (__mmask8) __U,
11672 _MM_FROUND_CUR_DIRECTION);
11675 extern __inline __m128d
11676 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11677 _mm_maskz_fmsub_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B)
11679 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
11680 (__v2df) __A,
11681 -(__v2df) __B,
11682 (__mmask8) __U,
11683 _MM_FROUND_CUR_DIRECTION);
11686 extern __inline __m128
11687 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11688 _mm_maskz_fmsub_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B)
11690 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
11691 (__v4sf) __A,
11692 -(__v4sf) __B,
11693 (__mmask8) __U,
11694 _MM_FROUND_CUR_DIRECTION);
11697 extern __inline __m128d
11698 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11699 _mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
11701 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
11702 -(__v2df) __A,
11703 (__v2df) __B,
11704 (__mmask8) __U,
11705 _MM_FROUND_CUR_DIRECTION);
11708 extern __inline __m128
11709 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11710 _mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
11712 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
11713 -(__v4sf) __A,
11714 (__v4sf) __B,
11715 (__mmask8) __U,
11716 _MM_FROUND_CUR_DIRECTION);
11719 extern __inline __m128d
11720 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11721 _mm_mask3_fnmadd_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U)
11723 return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
11724 -(__v2df) __A,
11725 (__v2df) __B,
11726 (__mmask8) __U,
11727 _MM_FROUND_CUR_DIRECTION);
11730 extern __inline __m128
11731 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11732 _mm_mask3_fnmadd_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U)
11734 return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
11735 -(__v4sf) __A,
11736 (__v4sf) __B,
11737 (__mmask8) __U,
11738 _MM_FROUND_CUR_DIRECTION);
11741 extern __inline __m128d
11742 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11743 _mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B)
11745 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
11746 -(__v2df) __A,
11747 (__v2df) __B,
11748 (__mmask8) __U,
11749 _MM_FROUND_CUR_DIRECTION);
11752 extern __inline __m128
11753 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11754 _mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B)
11756 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
11757 -(__v4sf) __A,
11758 (__v4sf) __B,
11759 (__mmask8) __U,
11760 _MM_FROUND_CUR_DIRECTION);
11763 extern __inline __m128d
11764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11765 _mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
11767 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
11768 -(__v2df) __A,
11769 -(__v2df) __B,
11770 (__mmask8) __U,
11771 _MM_FROUND_CUR_DIRECTION);
11774 extern __inline __m128
11775 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11776 _mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
11778 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
11779 -(__v4sf) __A,
11780 -(__v4sf) __B,
11781 (__mmask8) __U,
11782 _MM_FROUND_CUR_DIRECTION);
11785 extern __inline __m128d
11786 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11787 _mm_mask3_fnmsub_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U)
11789 return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
11790 -(__v2df) __A,
11791 (__v2df) __B,
11792 (__mmask8) __U,
11793 _MM_FROUND_CUR_DIRECTION);
11796 extern __inline __m128
11797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11798 _mm_mask3_fnmsub_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U)
11800 return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
11801 -(__v4sf) __A,
11802 (__v4sf) __B,
11803 (__mmask8) __U,
11804 _MM_FROUND_CUR_DIRECTION);
11807 extern __inline __m128d
11808 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11809 _mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B)
11811 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
11812 -(__v2df) __A,
11813 -(__v2df) __B,
11814 (__mmask8) __U,
11815 _MM_FROUND_CUR_DIRECTION);
11818 extern __inline __m128
11819 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11820 _mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B)
11822 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
11823 -(__v4sf) __A,
11824 -(__v4sf) __B,
11825 (__mmask8) __U,
11826 _MM_FROUND_CUR_DIRECTION);
11829 #ifdef __OPTIMIZE__
11830 extern __inline __m128d
11831 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11832 _mm_mask_fmadd_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
11833 const int __R)
11835 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
11836 (__v2df) __A,
11837 (__v2df) __B,
11838 (__mmask8) __U, __R);
11841 extern __inline __m128
11842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11843 _mm_mask_fmadd_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
11844 const int __R)
11846 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
11847 (__v4sf) __A,
11848 (__v4sf) __B,
11849 (__mmask8) __U, __R);
11852 extern __inline __m128d
11853 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11854 _mm_mask3_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U,
11855 const int __R)
11857 return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
11858 (__v2df) __A,
11859 (__v2df) __B,
11860 (__mmask8) __U, __R);
11863 extern __inline __m128
11864 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11865 _mm_mask3_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U,
11866 const int __R)
11868 return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
11869 (__v4sf) __A,
11870 (__v4sf) __B,
11871 (__mmask8) __U, __R);
11874 extern __inline __m128d
11875 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11876 _mm_maskz_fmadd_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B,
11877 const int __R)
11879 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
11880 (__v2df) __A,
11881 (__v2df) __B,
11882 (__mmask8) __U, __R);
11885 extern __inline __m128
11886 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11887 _mm_maskz_fmadd_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B,
11888 const int __R)
11890 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
11891 (__v4sf) __A,
11892 (__v4sf) __B,
11893 (__mmask8) __U, __R);
11896 extern __inline __m128d
11897 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11898 _mm_mask_fmsub_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
11899 const int __R)
11901 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
11902 (__v2df) __A,
11903 -(__v2df) __B,
11904 (__mmask8) __U, __R);
11907 extern __inline __m128
11908 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11909 _mm_mask_fmsub_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
11910 const int __R)
11912 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
11913 (__v4sf) __A,
11914 -(__v4sf) __B,
11915 (__mmask8) __U, __R);
11918 extern __inline __m128d
11919 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11920 _mm_mask3_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U,
11921 const int __R)
11923 return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
11924 (__v2df) __A,
11925 (__v2df) __B,
11926 (__mmask8) __U, __R);
11929 extern __inline __m128
11930 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11931 _mm_mask3_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U,
11932 const int __R)
11934 return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
11935 (__v4sf) __A,
11936 (__v4sf) __B,
11937 (__mmask8) __U, __R);
11940 extern __inline __m128d
11941 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11942 _mm_maskz_fmsub_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B,
11943 const int __R)
11945 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
11946 (__v2df) __A,
11947 -(__v2df) __B,
11948 (__mmask8) __U, __R);
11951 extern __inline __m128
11952 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11953 _mm_maskz_fmsub_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B,
11954 const int __R)
11956 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
11957 (__v4sf) __A,
11958 -(__v4sf) __B,
11959 (__mmask8) __U, __R);
11962 extern __inline __m128d
11963 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11964 _mm_mask_fnmadd_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
11965 const int __R)
11967 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
11968 -(__v2df) __A,
11969 (__v2df) __B,
11970 (__mmask8) __U, __R);
11973 extern __inline __m128
11974 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11975 _mm_mask_fnmadd_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
11976 const int __R)
11978 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
11979 -(__v4sf) __A,
11980 (__v4sf) __B,
11981 (__mmask8) __U, __R);
11984 extern __inline __m128d
11985 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11986 _mm_mask3_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U,
11987 const int __R)
11989 return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
11990 -(__v2df) __A,
11991 (__v2df) __B,
11992 (__mmask8) __U, __R);
11995 extern __inline __m128
11996 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11997 _mm_mask3_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U,
11998 const int __R)
12000 return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
12001 -(__v4sf) __A,
12002 (__v4sf) __B,
12003 (__mmask8) __U, __R);
12006 extern __inline __m128d
12007 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12008 _mm_maskz_fnmadd_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B,
12009 const int __R)
12011 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
12012 -(__v2df) __A,
12013 (__v2df) __B,
12014 (__mmask8) __U, __R);
12017 extern __inline __m128
12018 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12019 _mm_maskz_fnmadd_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B,
12020 const int __R)
12022 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
12023 -(__v4sf) __A,
12024 (__v4sf) __B,
12025 (__mmask8) __U, __R);
12028 extern __inline __m128d
12029 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12030 _mm_mask_fnmsub_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
12031 const int __R)
12033 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
12034 -(__v2df) __A,
12035 -(__v2df) __B,
12036 (__mmask8) __U, __R);
12039 extern __inline __m128
12040 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12041 _mm_mask_fnmsub_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
12042 const int __R)
12044 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
12045 -(__v4sf) __A,
12046 -(__v4sf) __B,
12047 (__mmask8) __U, __R);
12050 extern __inline __m128d
12051 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12052 _mm_mask3_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U,
12053 const int __R)
12055 return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
12056 -(__v2df) __A,
12057 (__v2df) __B,
12058 (__mmask8) __U, __R);
12061 extern __inline __m128
12062 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12063 _mm_mask3_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U,
12064 const int __R)
12066 return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
12067 -(__v4sf) __A,
12068 (__v4sf) __B,
12069 (__mmask8) __U, __R);
12072 extern __inline __m128d
12073 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12074 _mm_maskz_fnmsub_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B,
12075 const int __R)
12077 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
12078 -(__v2df) __A,
12079 -(__v2df) __B,
12080 (__mmask8) __U, __R);
12083 extern __inline __m128
12084 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12085 _mm_maskz_fnmsub_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B,
12086 const int __R)
12088 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
12089 -(__v4sf) __A,
12090 -(__v4sf) __B,
12091 (__mmask8) __U, __R);
12093 #else
12094 #define _mm_mask_fmadd_round_sd(A, U, B, C, R) \
12095 (__m128d) __builtin_ia32_vfmaddsd3_mask (A, B, C, U, R)
12097 #define _mm_mask_fmadd_round_ss(A, U, B, C, R) \
12098 (__m128) __builtin_ia32_vfmaddss3_mask (A, B, C, U, R)
12100 #define _mm_mask3_fmadd_round_sd(A, B, C, U, R) \
12101 (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, B, C, U, R)
12103 #define _mm_mask3_fmadd_round_ss(A, B, C, U, R) \
12104 (__m128) __builtin_ia32_vfmaddss3_mask3 (A, B, C, U, R)
12106 #define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \
12107 (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, B, C, U, R)
12109 #define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \
12110 (__m128) __builtin_ia32_vfmaddss3_maskz (A, B, C, U, R)
12112 #define _mm_mask_fmsub_round_sd(A, U, B, C, R) \
12113 (__m128d) __builtin_ia32_vfmaddsd3_mask (A, B, -(C), U, R)
12115 #define _mm_mask_fmsub_round_ss(A, U, B, C, R) \
12116 (__m128) __builtin_ia32_vfmaddss3_mask (A, B, -(C), U, R)
12118 #define _mm_mask3_fmsub_round_sd(A, B, C, U, R) \
12119 (__m128d) __builtin_ia32_vfmsubsd3_mask3 (A, B, C, U, R)
12121 #define _mm_mask3_fmsub_round_ss(A, B, C, U, R) \
12122 (__m128) __builtin_ia32_vfmsubss3_mask3 (A, B, C, U, R)
12124 #define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \
12125 (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, B, -(C), U, R)
12127 #define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \
12128 (__m128) __builtin_ia32_vfmaddss3_maskz (A, B, -(C), U, R)
12130 #define _mm_mask_fnmadd_round_sd(A, U, B, C, R) \
12131 (__m128d) __builtin_ia32_vfmaddsd3_mask (A, -(B), C, U, R)
12133 #define _mm_mask_fnmadd_round_ss(A, U, B, C, R) \
12134 (__m128) __builtin_ia32_vfmaddss3_mask (A, -(B), C, U, R)
12136 #define _mm_mask3_fnmadd_round_sd(A, B, C, U, R) \
12137 (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, -(B), C, U, R)
12139 #define _mm_mask3_fnmadd_round_ss(A, B, C, U, R) \
12140 (__m128) __builtin_ia32_vfmaddss3_mask3 (A, -(B), C, U, R)
12142 #define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \
12143 (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, -(B), C, U, R)
12145 #define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \
12146 (__m128) __builtin_ia32_vfmaddss3_maskz (A, -(B), C, U, R)
12148 #define _mm_mask_fnmsub_round_sd(A, U, B, C, R) \
12149 (__m128d) __builtin_ia32_vfmaddsd3_mask (A, -(B), -(C), U, R)
12151 #define _mm_mask_fnmsub_round_ss(A, U, B, C, R) \
12152 (__m128) __builtin_ia32_vfmaddss3_mask (A, -(B), -(C), U, R)
12154 #define _mm_mask3_fnmsub_round_sd(A, B, C, U, R) \
12155 (__m128d) __builtin_ia32_vfmsubsd3_mask3 (A, -(B), C, U, R)
12157 #define _mm_mask3_fnmsub_round_ss(A, B, C, U, R) \
12158 (__m128) __builtin_ia32_vfmsubss3_mask3 (A, -(B), C, U, R)
12160 #define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \
12161 (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, -(B), -(C), U, R)
12163 #define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \
12164 (__m128) __builtin_ia32_vfmaddss3_maskz (A, -(B), -(C), U, R)
12165 #endif
12167 #ifdef __OPTIMIZE__
12168 extern __inline int
12169 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12170 _mm_comi_round_ss (__m128 __A, __m128 __B, const int __P, const int __R)
12172 return __builtin_ia32_vcomiss ((__v4sf) __A, (__v4sf) __B, __P, __R);
12175 extern __inline int
12176 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12177 _mm_comi_round_sd (__m128d __A, __m128d __B, const int __P, const int __R)
12179 return __builtin_ia32_vcomisd ((__v2df) __A, (__v2df) __B, __P, __R);
12181 #else
12182 #define _mm_comi_round_ss(A, B, C, D)\
12183 __builtin_ia32_vcomiss(A, B, C, D)
12184 #define _mm_comi_round_sd(A, B, C, D)\
12185 __builtin_ia32_vcomisd(A, B, C, D)
12186 #endif
12188 extern __inline __m512d
12189 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12190 _mm512_sqrt_pd (__m512d __A)
12192 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
12193 (__v8df)
12194 _mm512_undefined_pd (),
12195 (__mmask8) -1,
12196 _MM_FROUND_CUR_DIRECTION);
12199 extern __inline __m512d
12200 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12201 _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
12203 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
12204 (__v8df) __W,
12205 (__mmask8) __U,
12206 _MM_FROUND_CUR_DIRECTION);
12209 extern __inline __m512d
12210 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12211 _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
12213 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
12214 (__v8df)
12215 _mm512_setzero_pd (),
12216 (__mmask8) __U,
12217 _MM_FROUND_CUR_DIRECTION);
12220 extern __inline __m512
12221 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12222 _mm512_sqrt_ps (__m512 __A)
12224 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
12225 (__v16sf)
12226 _mm512_undefined_ps (),
12227 (__mmask16) -1,
12228 _MM_FROUND_CUR_DIRECTION);
12231 extern __inline __m512
12232 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12233 _mm512_mask_sqrt_ps (__m512 __W, __mmask16 __U, __m512 __A)
12235 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
12236 (__v16sf) __W,
12237 (__mmask16) __U,
12238 _MM_FROUND_CUR_DIRECTION);
12241 extern __inline __m512
12242 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12243 _mm512_maskz_sqrt_ps (__mmask16 __U, __m512 __A)
12245 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
12246 (__v16sf)
12247 _mm512_setzero_ps (),
12248 (__mmask16) __U,
12249 _MM_FROUND_CUR_DIRECTION);
12252 extern __inline __m512d
12253 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12254 _mm512_add_pd (__m512d __A, __m512d __B)
12256 return (__m512d) ((__v8df)__A + (__v8df)__B);
12259 extern __inline __m512d
12260 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12261 _mm512_mask_add_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
12263 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
12264 (__v8df) __B,
12265 (__v8df) __W,
12266 (__mmask8) __U,
12267 _MM_FROUND_CUR_DIRECTION);
12270 extern __inline __m512d
12271 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12272 _mm512_maskz_add_pd (__mmask8 __U, __m512d __A, __m512d __B)
12274 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
12275 (__v8df) __B,
12276 (__v8df)
12277 _mm512_setzero_pd (),
12278 (__mmask8) __U,
12279 _MM_FROUND_CUR_DIRECTION);
12282 extern __inline __m512
12283 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12284 _mm512_add_ps (__m512 __A, __m512 __B)
12286 return (__m512) ((__v16sf)__A + (__v16sf)__B);
12289 extern __inline __m512
12290 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12291 _mm512_mask_add_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
12293 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
12294 (__v16sf) __B,
12295 (__v16sf) __W,
12296 (__mmask16) __U,
12297 _MM_FROUND_CUR_DIRECTION);
12300 extern __inline __m512
12301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12302 _mm512_maskz_add_ps (__mmask16 __U, __m512 __A, __m512 __B)
12304 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
12305 (__v16sf) __B,
12306 (__v16sf)
12307 _mm512_setzero_ps (),
12308 (__mmask16) __U,
12309 _MM_FROUND_CUR_DIRECTION);
12312 extern __inline __m128d
12313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12314 _mm_mask_add_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
12316 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
12317 (__v2df) __B,
12318 (__v2df) __W,
12319 (__mmask8) __U,
12320 _MM_FROUND_CUR_DIRECTION);
12323 extern __inline __m128d
12324 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12325 _mm_maskz_add_sd (__mmask8 __U, __m128d __A, __m128d __B)
12327 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
12328 (__v2df) __B,
12329 (__v2df)
12330 _mm_setzero_pd (),
12331 (__mmask8) __U,
12332 _MM_FROUND_CUR_DIRECTION);
12335 extern __inline __m128
12336 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12337 _mm_mask_add_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
12339 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
12340 (__v4sf) __B,
12341 (__v4sf) __W,
12342 (__mmask8) __U,
12343 _MM_FROUND_CUR_DIRECTION);
12346 extern __inline __m128
12347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12348 _mm_maskz_add_ss (__mmask8 __U, __m128 __A, __m128 __B)
12350 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
12351 (__v4sf) __B,
12352 (__v4sf)
12353 _mm_setzero_ps (),
12354 (__mmask8) __U,
12355 _MM_FROUND_CUR_DIRECTION);
12358 extern __inline __m512d
12359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12360 _mm512_sub_pd (__m512d __A, __m512d __B)
12362 return (__m512d) ((__v8df)__A - (__v8df)__B);
12365 extern __inline __m512d
12366 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12367 _mm512_mask_sub_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
12369 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
12370 (__v8df) __B,
12371 (__v8df) __W,
12372 (__mmask8) __U,
12373 _MM_FROUND_CUR_DIRECTION);
12376 extern __inline __m512d
12377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12378 _mm512_maskz_sub_pd (__mmask8 __U, __m512d __A, __m512d __B)
12380 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
12381 (__v8df) __B,
12382 (__v8df)
12383 _mm512_setzero_pd (),
12384 (__mmask8) __U,
12385 _MM_FROUND_CUR_DIRECTION);
12388 extern __inline __m512
12389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12390 _mm512_sub_ps (__m512 __A, __m512 __B)
12392 return (__m512) ((__v16sf)__A - (__v16sf)__B);
12395 extern __inline __m512
12396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12397 _mm512_mask_sub_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
12399 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
12400 (__v16sf) __B,
12401 (__v16sf) __W,
12402 (__mmask16) __U,
12403 _MM_FROUND_CUR_DIRECTION);
12406 extern __inline __m512
12407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12408 _mm512_maskz_sub_ps (__mmask16 __U, __m512 __A, __m512 __B)
12410 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
12411 (__v16sf) __B,
12412 (__v16sf)
12413 _mm512_setzero_ps (),
12414 (__mmask16) __U,
12415 _MM_FROUND_CUR_DIRECTION);
12418 extern __inline __m128d
12419 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12420 _mm_mask_sub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
12422 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
12423 (__v2df) __B,
12424 (__v2df) __W,
12425 (__mmask8) __U,
12426 _MM_FROUND_CUR_DIRECTION);
12429 extern __inline __m128d
12430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12431 _mm_maskz_sub_sd (__mmask8 __U, __m128d __A, __m128d __B)
12433 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
12434 (__v2df) __B,
12435 (__v2df)
12436 _mm_setzero_pd (),
12437 (__mmask8) __U,
12438 _MM_FROUND_CUR_DIRECTION);
12441 extern __inline __m128
12442 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12443 _mm_mask_sub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
12445 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
12446 (__v4sf) __B,
12447 (__v4sf) __W,
12448 (__mmask8) __U,
12449 _MM_FROUND_CUR_DIRECTION);
12452 extern __inline __m128
12453 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12454 _mm_maskz_sub_ss (__mmask8 __U, __m128 __A, __m128 __B)
12456 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
12457 (__v4sf) __B,
12458 (__v4sf)
12459 _mm_setzero_ps (),
12460 (__mmask8) __U,
12461 _MM_FROUND_CUR_DIRECTION);
12464 extern __inline __m512d
12465 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12466 _mm512_mul_pd (__m512d __A, __m512d __B)
12468 return (__m512d) ((__v8df)__A * (__v8df)__B);
12471 extern __inline __m512d
12472 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12473 _mm512_mask_mul_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
12475 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
12476 (__v8df) __B,
12477 (__v8df) __W,
12478 (__mmask8) __U,
12479 _MM_FROUND_CUR_DIRECTION);
12482 extern __inline __m512d
12483 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12484 _mm512_maskz_mul_pd (__mmask8 __U, __m512d __A, __m512d __B)
12486 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
12487 (__v8df) __B,
12488 (__v8df)
12489 _mm512_setzero_pd (),
12490 (__mmask8) __U,
12491 _MM_FROUND_CUR_DIRECTION);
12494 extern __inline __m512
12495 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12496 _mm512_mul_ps (__m512 __A, __m512 __B)
12498 return (__m512) ((__v16sf)__A * (__v16sf)__B);
12501 extern __inline __m512
12502 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12503 _mm512_mask_mul_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
12505 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
12506 (__v16sf) __B,
12507 (__v16sf) __W,
12508 (__mmask16) __U,
12509 _MM_FROUND_CUR_DIRECTION);
12512 extern __inline __m512
12513 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12514 _mm512_maskz_mul_ps (__mmask16 __U, __m512 __A, __m512 __B)
12516 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
12517 (__v16sf) __B,
12518 (__v16sf)
12519 _mm512_setzero_ps (),
12520 (__mmask16) __U,
12521 _MM_FROUND_CUR_DIRECTION);
12524 extern __inline __m128d
12525 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12526 _mm_mask_mul_sd (__m128d __W, __mmask8 __U, __m128d __A,
12527 __m128d __B)
12529 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
12530 (__v2df) __B,
12531 (__v2df) __W,
12532 (__mmask8) __U,
12533 _MM_FROUND_CUR_DIRECTION);
12536 extern __inline __m128d
12537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12538 _mm_maskz_mul_sd (__mmask8 __U, __m128d __A, __m128d __B)
12540 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
12541 (__v2df) __B,
12542 (__v2df)
12543 _mm_setzero_pd (),
12544 (__mmask8) __U,
12545 _MM_FROUND_CUR_DIRECTION);
12548 extern __inline __m128
12549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12550 _mm_mask_mul_ss (__m128 __W, __mmask8 __U, __m128 __A,
12551 __m128 __B)
12553 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
12554 (__v4sf) __B,
12555 (__v4sf) __W,
12556 (__mmask8) __U,
12557 _MM_FROUND_CUR_DIRECTION);
12560 extern __inline __m128
12561 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12562 _mm_maskz_mul_ss (__mmask8 __U, __m128 __A, __m128 __B)
12564 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
12565 (__v4sf) __B,
12566 (__v4sf)
12567 _mm_setzero_ps (),
12568 (__mmask8) __U,
12569 _MM_FROUND_CUR_DIRECTION);
12572 extern __inline __m512d
12573 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12574 _mm512_div_pd (__m512d __M, __m512d __V)
12576 return (__m512d) ((__v8df)__M / (__v8df)__V);
12579 extern __inline __m512d
12580 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12581 _mm512_mask_div_pd (__m512d __W, __mmask8 __U, __m512d __M, __m512d __V)
12583 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
12584 (__v8df) __V,
12585 (__v8df) __W,
12586 (__mmask8) __U,
12587 _MM_FROUND_CUR_DIRECTION);
12590 extern __inline __m512d
12591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12592 _mm512_maskz_div_pd (__mmask8 __U, __m512d __M, __m512d __V)
12594 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
12595 (__v8df) __V,
12596 (__v8df)
12597 _mm512_setzero_pd (),
12598 (__mmask8) __U,
12599 _MM_FROUND_CUR_DIRECTION);
12602 extern __inline __m512
12603 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12604 _mm512_div_ps (__m512 __A, __m512 __B)
12606 return (__m512) ((__v16sf)__A / (__v16sf)__B);
12609 extern __inline __m512
12610 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12611 _mm512_mask_div_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
12613 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
12614 (__v16sf) __B,
12615 (__v16sf) __W,
12616 (__mmask16) __U,
12617 _MM_FROUND_CUR_DIRECTION);
12620 extern __inline __m512
12621 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12622 _mm512_maskz_div_ps (__mmask16 __U, __m512 __A, __m512 __B)
12624 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
12625 (__v16sf) __B,
12626 (__v16sf)
12627 _mm512_setzero_ps (),
12628 (__mmask16) __U,
12629 _MM_FROUND_CUR_DIRECTION);
12632 extern __inline __m128d
12633 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12634 _mm_mask_div_sd (__m128d __W, __mmask8 __U, __m128d __A,
12635 __m128d __B)
12637 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
12638 (__v2df) __B,
12639 (__v2df) __W,
12640 (__mmask8) __U,
12641 _MM_FROUND_CUR_DIRECTION);
12644 extern __inline __m128d
12645 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12646 _mm_maskz_div_sd (__mmask8 __U, __m128d __A, __m128d __B)
12648 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
12649 (__v2df) __B,
12650 (__v2df)
12651 _mm_setzero_pd (),
12652 (__mmask8) __U,
12653 _MM_FROUND_CUR_DIRECTION);
12656 extern __inline __m128
12657 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12658 _mm_mask_div_ss (__m128 __W, __mmask8 __U, __m128 __A,
12659 __m128 __B)
12661 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
12662 (__v4sf) __B,
12663 (__v4sf) __W,
12664 (__mmask8) __U,
12665 _MM_FROUND_CUR_DIRECTION);
12668 extern __inline __m128
12669 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12670 _mm_maskz_div_ss (__mmask8 __U, __m128 __A, __m128 __B)
12672 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
12673 (__v4sf) __B,
12674 (__v4sf)
12675 _mm_setzero_ps (),
12676 (__mmask8) __U,
12677 _MM_FROUND_CUR_DIRECTION);
12680 extern __inline __m512d
12681 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12682 _mm512_max_pd (__m512d __A, __m512d __B)
12684 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
12685 (__v8df) __B,
12686 (__v8df)
12687 _mm512_undefined_pd (),
12688 (__mmask8) -1,
12689 _MM_FROUND_CUR_DIRECTION);
12692 extern __inline __m512d
12693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12694 _mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
12696 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
12697 (__v8df) __B,
12698 (__v8df) __W,
12699 (__mmask8) __U,
12700 _MM_FROUND_CUR_DIRECTION);
12703 extern __inline __m512d
12704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12705 _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
12707 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
12708 (__v8df) __B,
12709 (__v8df)
12710 _mm512_setzero_pd (),
12711 (__mmask8) __U,
12712 _MM_FROUND_CUR_DIRECTION);
12715 extern __inline __m512
12716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12717 _mm512_max_ps (__m512 __A, __m512 __B)
12719 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
12720 (__v16sf) __B,
12721 (__v16sf)
12722 _mm512_undefined_ps (),
12723 (__mmask16) -1,
12724 _MM_FROUND_CUR_DIRECTION);
12727 extern __inline __m512
12728 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12729 _mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
12731 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
12732 (__v16sf) __B,
12733 (__v16sf) __W,
12734 (__mmask16) __U,
12735 _MM_FROUND_CUR_DIRECTION);
12738 extern __inline __m512
12739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12740 _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
12742 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
12743 (__v16sf) __B,
12744 (__v16sf)
12745 _mm512_setzero_ps (),
12746 (__mmask16) __U,
12747 _MM_FROUND_CUR_DIRECTION);
12750 extern __inline __m128d
12751 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12752 _mm_mask_max_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
12754 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
12755 (__v2df) __B,
12756 (__v2df) __W,
12757 (__mmask8) __U,
12758 _MM_FROUND_CUR_DIRECTION);
12761 extern __inline __m128d
12762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12763 _mm_maskz_max_sd (__mmask8 __U, __m128d __A, __m128d __B)
12765 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
12766 (__v2df) __B,
12767 (__v2df)
12768 _mm_setzero_pd (),
12769 (__mmask8) __U,
12770 _MM_FROUND_CUR_DIRECTION);
12773 extern __inline __m128
12774 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12775 _mm_mask_max_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
12777 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
12778 (__v4sf) __B,
12779 (__v4sf) __W,
12780 (__mmask8) __U,
12781 _MM_FROUND_CUR_DIRECTION);
12784 extern __inline __m128
12785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12786 _mm_maskz_max_ss (__mmask8 __U, __m128 __A, __m128 __B)
12788 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
12789 (__v4sf) __B,
12790 (__v4sf)
12791 _mm_setzero_ps (),
12792 (__mmask8) __U,
12793 _MM_FROUND_CUR_DIRECTION);
12796 extern __inline __m512d
12797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12798 _mm512_min_pd (__m512d __A, __m512d __B)
12800 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
12801 (__v8df) __B,
12802 (__v8df)
12803 _mm512_undefined_pd (),
12804 (__mmask8) -1,
12805 _MM_FROUND_CUR_DIRECTION);
12808 extern __inline __m512d
12809 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12810 _mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
12812 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
12813 (__v8df) __B,
12814 (__v8df) __W,
12815 (__mmask8) __U,
12816 _MM_FROUND_CUR_DIRECTION);
12819 extern __inline __m512d
12820 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12821 _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
12823 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
12824 (__v8df) __B,
12825 (__v8df)
12826 _mm512_setzero_pd (),
12827 (__mmask8) __U,
12828 _MM_FROUND_CUR_DIRECTION);
12831 extern __inline __m512
12832 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12833 _mm512_min_ps (__m512 __A, __m512 __B)
12835 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
12836 (__v16sf) __B,
12837 (__v16sf)
12838 _mm512_undefined_ps (),
12839 (__mmask16) -1,
12840 _MM_FROUND_CUR_DIRECTION);
12843 extern __inline __m512
12844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12845 _mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
12847 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
12848 (__v16sf) __B,
12849 (__v16sf) __W,
12850 (__mmask16) __U,
12851 _MM_FROUND_CUR_DIRECTION);
12854 extern __inline __m512
12855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12856 _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
12858 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
12859 (__v16sf) __B,
12860 (__v16sf)
12861 _mm512_setzero_ps (),
12862 (__mmask16) __U,
12863 _MM_FROUND_CUR_DIRECTION);
12866 extern __inline __m128d
12867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12868 _mm_mask_min_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
12870 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
12871 (__v2df) __B,
12872 (__v2df) __W,
12873 (__mmask8) __U,
12874 _MM_FROUND_CUR_DIRECTION);
12877 extern __inline __m128d
12878 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12879 _mm_maskz_min_sd (__mmask8 __U, __m128d __A, __m128d __B)
12881 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
12882 (__v2df) __B,
12883 (__v2df)
12884 _mm_setzero_pd (),
12885 (__mmask8) __U,
12886 _MM_FROUND_CUR_DIRECTION);
12889 extern __inline __m128
12890 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12891 _mm_mask_min_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
12893 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
12894 (__v4sf) __B,
12895 (__v4sf) __W,
12896 (__mmask8) __U,
12897 _MM_FROUND_CUR_DIRECTION);
12900 extern __inline __m128
12901 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12902 _mm_maskz_min_ss (__mmask8 __U, __m128 __A, __m128 __B)
12904 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
12905 (__v4sf) __B,
12906 (__v4sf)
12907 _mm_setzero_ps (),
12908 (__mmask8) __U,
12909 _MM_FROUND_CUR_DIRECTION);
12912 extern __inline __m512d
12913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12914 _mm512_scalef_pd (__m512d __A, __m512d __B)
12916 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
12917 (__v8df) __B,
12918 (__v8df)
12919 _mm512_undefined_pd (),
12920 (__mmask8) -1,
12921 _MM_FROUND_CUR_DIRECTION);
12924 extern __inline __m512d
12925 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12926 _mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
12928 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
12929 (__v8df) __B,
12930 (__v8df) __W,
12931 (__mmask8) __U,
12932 _MM_FROUND_CUR_DIRECTION);
12935 extern __inline __m512d
12936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12937 _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
12939 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
12940 (__v8df) __B,
12941 (__v8df)
12942 _mm512_setzero_pd (),
12943 (__mmask8) __U,
12944 _MM_FROUND_CUR_DIRECTION);
12947 extern __inline __m512
12948 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12949 _mm512_scalef_ps (__m512 __A, __m512 __B)
12951 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
12952 (__v16sf) __B,
12953 (__v16sf)
12954 _mm512_undefined_ps (),
12955 (__mmask16) -1,
12956 _MM_FROUND_CUR_DIRECTION);
12959 extern __inline __m512
12960 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12961 _mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
12963 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
12964 (__v16sf) __B,
12965 (__v16sf) __W,
12966 (__mmask16) __U,
12967 _MM_FROUND_CUR_DIRECTION);
12970 extern __inline __m512
12971 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12972 _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
12974 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
12975 (__v16sf) __B,
12976 (__v16sf)
12977 _mm512_setzero_ps (),
12978 (__mmask16) __U,
12979 _MM_FROUND_CUR_DIRECTION);
12982 extern __inline __m128d
12983 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12984 _mm_scalef_sd (__m128d __A, __m128d __B)
12986 return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
12987 (__v2df) __B,
12988 (__v2df)
12989 _mm_setzero_pd (),
12990 (__mmask8) -1,
12991 _MM_FROUND_CUR_DIRECTION);
12994 extern __inline __m128
12995 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12996 _mm_scalef_ss (__m128 __A, __m128 __B)
12998 return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
12999 (__v4sf) __B,
13000 (__v4sf)
13001 _mm_setzero_ps (),
13002 (__mmask8) -1,
13003 _MM_FROUND_CUR_DIRECTION);
13006 extern __inline __m512d
13007 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13008 _mm512_fmadd_pd (__m512d __A, __m512d __B, __m512d __C)
13010 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
13011 (__v8df) __B,
13012 (__v8df) __C,
13013 (__mmask8) -1,
13014 _MM_FROUND_CUR_DIRECTION);
13017 extern __inline __m512d
13018 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13019 _mm512_mask_fmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
13021 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
13022 (__v8df) __B,
13023 (__v8df) __C,
13024 (__mmask8) __U,
13025 _MM_FROUND_CUR_DIRECTION);
13028 extern __inline __m512d
13029 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13030 _mm512_mask3_fmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
13032 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
13033 (__v8df) __B,
13034 (__v8df) __C,
13035 (__mmask8) __U,
13036 _MM_FROUND_CUR_DIRECTION);
13039 extern __inline __m512d
13040 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13041 _mm512_maskz_fmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
13043 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
13044 (__v8df) __B,
13045 (__v8df) __C,
13046 (__mmask8) __U,
13047 _MM_FROUND_CUR_DIRECTION);
13050 extern __inline __m512
13051 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13052 _mm512_fmadd_ps (__m512 __A, __m512 __B, __m512 __C)
13054 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
13055 (__v16sf) __B,
13056 (__v16sf) __C,
13057 (__mmask16) -1,
13058 _MM_FROUND_CUR_DIRECTION);
13061 extern __inline __m512
13062 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13063 _mm512_mask_fmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
13065 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
13066 (__v16sf) __B,
13067 (__v16sf) __C,
13068 (__mmask16) __U,
13069 _MM_FROUND_CUR_DIRECTION);
13072 extern __inline __m512
13073 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13074 _mm512_mask3_fmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
13076 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
13077 (__v16sf) __B,
13078 (__v16sf) __C,
13079 (__mmask16) __U,
13080 _MM_FROUND_CUR_DIRECTION);
13083 extern __inline __m512
13084 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13085 _mm512_maskz_fmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
13087 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
13088 (__v16sf) __B,
13089 (__v16sf) __C,
13090 (__mmask16) __U,
13091 _MM_FROUND_CUR_DIRECTION);
13094 extern __inline __m512d
13095 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13096 _mm512_fmsub_pd (__m512d __A, __m512d __B, __m512d __C)
13098 return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A,
13099 (__v8df) __B,
13100 (__v8df) __C,
13101 (__mmask8) -1,
13102 _MM_FROUND_CUR_DIRECTION);
13105 extern __inline __m512d
13106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13107 _mm512_mask_fmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
13109 return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A,
13110 (__v8df) __B,
13111 (__v8df) __C,
13112 (__mmask8) __U,
13113 _MM_FROUND_CUR_DIRECTION);
13116 extern __inline __m512d
13117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13118 _mm512_mask3_fmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
13120 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
13121 (__v8df) __B,
13122 (__v8df) __C,
13123 (__mmask8) __U,
13124 _MM_FROUND_CUR_DIRECTION);
13127 extern __inline __m512d
13128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13129 _mm512_maskz_fmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
13131 return (__m512d) __builtin_ia32_vfmsubpd512_maskz ((__v8df) __A,
13132 (__v8df) __B,
13133 (__v8df) __C,
13134 (__mmask8) __U,
13135 _MM_FROUND_CUR_DIRECTION);
13138 extern __inline __m512
13139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13140 _mm512_fmsub_ps (__m512 __A, __m512 __B, __m512 __C)
13142 return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A,
13143 (__v16sf) __B,
13144 (__v16sf) __C,
13145 (__mmask16) -1,
13146 _MM_FROUND_CUR_DIRECTION);
13149 extern __inline __m512
13150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13151 _mm512_mask_fmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
13153 return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A,
13154 (__v16sf) __B,
13155 (__v16sf) __C,
13156 (__mmask16) __U,
13157 _MM_FROUND_CUR_DIRECTION);
13160 extern __inline __m512
13161 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13162 _mm512_mask3_fmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
13164 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
13165 (__v16sf) __B,
13166 (__v16sf) __C,
13167 (__mmask16) __U,
13168 _MM_FROUND_CUR_DIRECTION);
13171 extern __inline __m512
13172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13173 _mm512_maskz_fmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
13175 return (__m512) __builtin_ia32_vfmsubps512_maskz ((__v16sf) __A,
13176 (__v16sf) __B,
13177 (__v16sf) __C,
13178 (__mmask16) __U,
13179 _MM_FROUND_CUR_DIRECTION);
13182 extern __inline __m512d
13183 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13184 _mm512_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C)
13186 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
13187 (__v8df) __B,
13188 (__v8df) __C,
13189 (__mmask8) -1,
13190 _MM_FROUND_CUR_DIRECTION);
13193 extern __inline __m512d
13194 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13195 _mm512_mask_fmaddsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
13197 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
13198 (__v8df) __B,
13199 (__v8df) __C,
13200 (__mmask8) __U,
13201 _MM_FROUND_CUR_DIRECTION);
13204 extern __inline __m512d
13205 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13206 _mm512_mask3_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
13208 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
13209 (__v8df) __B,
13210 (__v8df) __C,
13211 (__mmask8) __U,
13212 _MM_FROUND_CUR_DIRECTION);
13215 extern __inline __m512d
13216 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13217 _mm512_maskz_fmaddsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
13219 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
13220 (__v8df) __B,
13221 (__v8df) __C,
13222 (__mmask8) __U,
13223 _MM_FROUND_CUR_DIRECTION);
13226 extern __inline __m512
13227 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13228 _mm512_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C)
13230 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
13231 (__v16sf) __B,
13232 (__v16sf) __C,
13233 (__mmask16) -1,
13234 _MM_FROUND_CUR_DIRECTION);
13237 extern __inline __m512
13238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13239 _mm512_mask_fmaddsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
13241 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
13242 (__v16sf) __B,
13243 (__v16sf) __C,
13244 (__mmask16) __U,
13245 _MM_FROUND_CUR_DIRECTION);
13248 extern __inline __m512
13249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13250 _mm512_mask3_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
13252 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
13253 (__v16sf) __B,
13254 (__v16sf) __C,
13255 (__mmask16) __U,
13256 _MM_FROUND_CUR_DIRECTION);
13259 extern __inline __m512
13260 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13261 _mm512_maskz_fmaddsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
13263 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
13264 (__v16sf) __B,
13265 (__v16sf) __C,
13266 (__mmask16) __U,
13267 _MM_FROUND_CUR_DIRECTION);
13270 extern __inline __m512d
13271 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13272 _mm512_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C)
13274 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
13275 (__v8df) __B,
13276 -(__v8df) __C,
13277 (__mmask8) -1,
13278 _MM_FROUND_CUR_DIRECTION);
13281 extern __inline __m512d
13282 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13283 _mm512_mask_fmsubadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
13285 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
13286 (__v8df) __B,
13287 -(__v8df) __C,
13288 (__mmask8) __U,
13289 _MM_FROUND_CUR_DIRECTION);
13292 extern __inline __m512d
13293 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13294 _mm512_mask3_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
13296 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
13297 (__v8df) __B,
13298 (__v8df) __C,
13299 (__mmask8) __U,
13300 _MM_FROUND_CUR_DIRECTION);
13303 extern __inline __m512d
13304 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13305 _mm512_maskz_fmsubadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
13307 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
13308 (__v8df) __B,
13309 -(__v8df) __C,
13310 (__mmask8) __U,
13311 _MM_FROUND_CUR_DIRECTION);
13314 extern __inline __m512
13315 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13316 _mm512_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C)
13318 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
13319 (__v16sf) __B,
13320 -(__v16sf) __C,
13321 (__mmask16) -1,
13322 _MM_FROUND_CUR_DIRECTION);
13325 extern __inline __m512
13326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13327 _mm512_mask_fmsubadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
13329 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
13330 (__v16sf) __B,
13331 -(__v16sf) __C,
13332 (__mmask16) __U,
13333 _MM_FROUND_CUR_DIRECTION);
13336 extern __inline __m512
13337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13338 _mm512_mask3_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
13340 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
13341 (__v16sf) __B,
13342 (__v16sf) __C,
13343 (__mmask16) __U,
13344 _MM_FROUND_CUR_DIRECTION);
13347 extern __inline __m512
13348 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13349 _mm512_maskz_fmsubadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
13351 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
13352 (__v16sf) __B,
13353 -(__v16sf) __C,
13354 (__mmask16) __U,
13355 _MM_FROUND_CUR_DIRECTION);
13358 extern __inline __m512d
13359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13360 _mm512_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C)
13362 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
13363 (__v8df) __B,
13364 (__v8df) __C,
13365 (__mmask8) -1,
13366 _MM_FROUND_CUR_DIRECTION);
13369 extern __inline __m512d
13370 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13371 _mm512_mask_fnmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
13373 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
13374 (__v8df) __B,
13375 (__v8df) __C,
13376 (__mmask8) __U,
13377 _MM_FROUND_CUR_DIRECTION);
13380 extern __inline __m512d
13381 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13382 _mm512_mask3_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
13384 return (__m512d) __builtin_ia32_vfnmaddpd512_mask3 ((__v8df) __A,
13385 (__v8df) __B,
13386 (__v8df) __C,
13387 (__mmask8) __U,
13388 _MM_FROUND_CUR_DIRECTION);
13391 extern __inline __m512d
13392 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13393 _mm512_maskz_fnmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
13395 return (__m512d) __builtin_ia32_vfnmaddpd512_maskz ((__v8df) __A,
13396 (__v8df) __B,
13397 (__v8df) __C,
13398 (__mmask8) __U,
13399 _MM_FROUND_CUR_DIRECTION);
13402 extern __inline __m512
13403 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13404 _mm512_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C)
13406 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
13407 (__v16sf) __B,
13408 (__v16sf) __C,
13409 (__mmask16) -1,
13410 _MM_FROUND_CUR_DIRECTION);
13413 extern __inline __m512
13414 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13415 _mm512_mask_fnmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
13417 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
13418 (__v16sf) __B,
13419 (__v16sf) __C,
13420 (__mmask16) __U,
13421 _MM_FROUND_CUR_DIRECTION);
13424 extern __inline __m512
13425 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13426 _mm512_mask3_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
13428 return (__m512) __builtin_ia32_vfnmaddps512_mask3 ((__v16sf) __A,
13429 (__v16sf) __B,
13430 (__v16sf) __C,
13431 (__mmask16) __U,
13432 _MM_FROUND_CUR_DIRECTION);
13435 extern __inline __m512
13436 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13437 _mm512_maskz_fnmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
13439 return (__m512) __builtin_ia32_vfnmaddps512_maskz ((__v16sf) __A,
13440 (__v16sf) __B,
13441 (__v16sf) __C,
13442 (__mmask16) __U,
13443 _MM_FROUND_CUR_DIRECTION);
13446 extern __inline __m512d
13447 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13448 _mm512_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C)
13450 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
13451 (__v8df) __B,
13452 (__v8df) __C,
13453 (__mmask8) -1,
13454 _MM_FROUND_CUR_DIRECTION);
13457 extern __inline __m512d
13458 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13459 _mm512_mask_fnmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
13461 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
13462 (__v8df) __B,
13463 (__v8df) __C,
13464 (__mmask8) __U,
13465 _MM_FROUND_CUR_DIRECTION);
13468 extern __inline __m512d
13469 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13470 _mm512_mask3_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
13472 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
13473 (__v8df) __B,
13474 (__v8df) __C,
13475 (__mmask8) __U,
13476 _MM_FROUND_CUR_DIRECTION);
13479 extern __inline __m512d
13480 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13481 _mm512_maskz_fnmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
13483 return (__m512d) __builtin_ia32_vfnmsubpd512_maskz ((__v8df) __A,
13484 (__v8df) __B,
13485 (__v8df) __C,
13486 (__mmask8) __U,
13487 _MM_FROUND_CUR_DIRECTION);
13490 extern __inline __m512
13491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13492 _mm512_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C)
13494 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
13495 (__v16sf) __B,
13496 (__v16sf) __C,
13497 (__mmask16) -1,
13498 _MM_FROUND_CUR_DIRECTION);
13501 extern __inline __m512
13502 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13503 _mm512_mask_fnmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
13505 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
13506 (__v16sf) __B,
13507 (__v16sf) __C,
13508 (__mmask16) __U,
13509 _MM_FROUND_CUR_DIRECTION);
13512 extern __inline __m512
13513 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13514 _mm512_mask3_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
13516 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
13517 (__v16sf) __B,
13518 (__v16sf) __C,
13519 (__mmask16) __U,
13520 _MM_FROUND_CUR_DIRECTION);
13523 extern __inline __m512
13524 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13525 _mm512_maskz_fnmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
13527 return (__m512) __builtin_ia32_vfnmsubps512_maskz ((__v16sf) __A,
13528 (__v16sf) __B,
13529 (__v16sf) __C,
13530 (__mmask16) __U,
13531 _MM_FROUND_CUR_DIRECTION);
13534 extern __inline __m256i
13535 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13536 _mm512_cvttpd_epi32 (__m512d __A)
13538 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
13539 (__v8si)
13540 _mm256_undefined_si256 (),
13541 (__mmask8) -1,
13542 _MM_FROUND_CUR_DIRECTION);
13545 extern __inline __m256i
13546 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13547 _mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
13549 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
13550 (__v8si) __W,
13551 (__mmask8) __U,
13552 _MM_FROUND_CUR_DIRECTION);
13555 extern __inline __m256i
13556 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13557 _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
13559 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
13560 (__v8si)
13561 _mm256_setzero_si256 (),
13562 (__mmask8) __U,
13563 _MM_FROUND_CUR_DIRECTION);
13566 extern __inline __m256i
13567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13568 _mm512_cvttpd_epu32 (__m512d __A)
13570 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
13571 (__v8si)
13572 _mm256_undefined_si256 (),
13573 (__mmask8) -1,
13574 _MM_FROUND_CUR_DIRECTION);
13577 extern __inline __m256i
13578 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13579 _mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
13581 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
13582 (__v8si) __W,
13583 (__mmask8) __U,
13584 _MM_FROUND_CUR_DIRECTION);
13587 extern __inline __m256i
13588 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13589 _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
13591 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
13592 (__v8si)
13593 _mm256_setzero_si256 (),
13594 (__mmask8) __U,
13595 _MM_FROUND_CUR_DIRECTION);
13598 extern __inline __m256i
13599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13600 _mm512_cvtpd_epi32 (__m512d __A)
13602 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
13603 (__v8si)
13604 _mm256_undefined_si256 (),
13605 (__mmask8) -1,
13606 _MM_FROUND_CUR_DIRECTION);
13609 extern __inline __m256i
13610 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13611 _mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
13613 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
13614 (__v8si) __W,
13615 (__mmask8) __U,
13616 _MM_FROUND_CUR_DIRECTION);
13619 extern __inline __m256i
13620 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13621 _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
13623 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
13624 (__v8si)
13625 _mm256_setzero_si256 (),
13626 (__mmask8) __U,
13627 _MM_FROUND_CUR_DIRECTION);
13630 extern __inline __m256i
13631 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13632 _mm512_cvtpd_epu32 (__m512d __A)
13634 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
13635 (__v8si)
13636 _mm256_undefined_si256 (),
13637 (__mmask8) -1,
13638 _MM_FROUND_CUR_DIRECTION);
13641 extern __inline __m256i
13642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13643 _mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
13645 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
13646 (__v8si) __W,
13647 (__mmask8) __U,
13648 _MM_FROUND_CUR_DIRECTION);
13651 extern __inline __m256i
13652 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13653 _mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
13655 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
13656 (__v8si)
13657 _mm256_setzero_si256 (),
13658 (__mmask8) __U,
13659 _MM_FROUND_CUR_DIRECTION);
13662 extern __inline __m512i
13663 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13664 _mm512_cvttps_epi32 (__m512 __A)
13666 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
13667 (__v16si)
13668 _mm512_undefined_epi32 (),
13669 (__mmask16) -1,
13670 _MM_FROUND_CUR_DIRECTION);
13673 extern __inline __m512i
13674 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13675 _mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
13677 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
13678 (__v16si) __W,
13679 (__mmask16) __U,
13680 _MM_FROUND_CUR_DIRECTION);
13683 extern __inline __m512i
13684 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13685 _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
13687 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
13688 (__v16si)
13689 _mm512_setzero_si512 (),
13690 (__mmask16) __U,
13691 _MM_FROUND_CUR_DIRECTION);
13694 extern __inline __m512i
13695 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13696 _mm512_cvttps_epu32 (__m512 __A)
13698 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
13699 (__v16si)
13700 _mm512_undefined_epi32 (),
13701 (__mmask16) -1,
13702 _MM_FROUND_CUR_DIRECTION);
13705 extern __inline __m512i
13706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13707 _mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
13709 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
13710 (__v16si) __W,
13711 (__mmask16) __U,
13712 _MM_FROUND_CUR_DIRECTION);
13715 extern __inline __m512i
13716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13717 _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
13719 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
13720 (__v16si)
13721 _mm512_setzero_si512 (),
13722 (__mmask16) __U,
13723 _MM_FROUND_CUR_DIRECTION);
13726 extern __inline __m512i
13727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13728 _mm512_cvtps_epi32 (__m512 __A)
13730 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
13731 (__v16si)
13732 _mm512_undefined_epi32 (),
13733 (__mmask16) -1,
13734 _MM_FROUND_CUR_DIRECTION);
13737 extern __inline __m512i
13738 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13739 _mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
13741 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
13742 (__v16si) __W,
13743 (__mmask16) __U,
13744 _MM_FROUND_CUR_DIRECTION);
13747 extern __inline __m512i
13748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13749 _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
13751 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
13752 (__v16si)
13753 _mm512_setzero_si512 (),
13754 (__mmask16) __U,
13755 _MM_FROUND_CUR_DIRECTION);
13758 extern __inline __m512i
13759 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13760 _mm512_cvtps_epu32 (__m512 __A)
13762 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
13763 (__v16si)
13764 _mm512_undefined_epi32 (),
13765 (__mmask16) -1,
13766 _MM_FROUND_CUR_DIRECTION);
13769 extern __inline __m512i
13770 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13771 _mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
13773 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
13774 (__v16si) __W,
13775 (__mmask16) __U,
13776 _MM_FROUND_CUR_DIRECTION);
13779 extern __inline __m512i
13780 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13781 _mm512_maskz_cvtps_epu32 (__mmask16 __U, __m512 __A)
13783 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
13784 (__v16si)
13785 _mm512_setzero_si512 (),
13786 (__mmask16) __U,
13787 _MM_FROUND_CUR_DIRECTION);
13790 extern __inline double
13791 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13792 _mm512_cvtsd_f64 (__m512d __A)
13794 return __A[0];
13797 extern __inline float
13798 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13799 _mm512_cvtss_f32 (__m512 __A)
13801 return __A[0];
13804 #ifdef __x86_64__
13805 extern __inline __m128
13806 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13807 _mm_cvtu64_ss (__m128 __A, unsigned long long __B)
13809 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
13810 _MM_FROUND_CUR_DIRECTION);
13813 extern __inline __m128d
13814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13815 _mm_cvtu64_sd (__m128d __A, unsigned long long __B)
13817 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
13818 _MM_FROUND_CUR_DIRECTION);
13820 #endif
13822 extern __inline __m128
13823 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13824 _mm_cvtu32_ss (__m128 __A, unsigned __B)
13826 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
13827 _MM_FROUND_CUR_DIRECTION);
13830 extern __inline __m512
13831 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13832 _mm512_cvtepi32_ps (__m512i __A)
13834 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
13835 (__v16sf)
13836 _mm512_undefined_ps (),
13837 (__mmask16) -1,
13838 _MM_FROUND_CUR_DIRECTION);
13841 extern __inline __m512
13842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13843 _mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
13845 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
13846 (__v16sf) __W,
13847 (__mmask16) __U,
13848 _MM_FROUND_CUR_DIRECTION);
13851 extern __inline __m512
13852 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13853 _mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
13855 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
13856 (__v16sf)
13857 _mm512_setzero_ps (),
13858 (__mmask16) __U,
13859 _MM_FROUND_CUR_DIRECTION);
13862 extern __inline __m512
13863 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13864 _mm512_cvtepu32_ps (__m512i __A)
13866 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
13867 (__v16sf)
13868 _mm512_undefined_ps (),
13869 (__mmask16) -1,
13870 _MM_FROUND_CUR_DIRECTION);
13873 extern __inline __m512
13874 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13875 _mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
13877 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
13878 (__v16sf) __W,
13879 (__mmask16) __U,
13880 _MM_FROUND_CUR_DIRECTION);
13883 extern __inline __m512
13884 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13885 _mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
13887 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
13888 (__v16sf)
13889 _mm512_setzero_ps (),
13890 (__mmask16) __U,
13891 _MM_FROUND_CUR_DIRECTION);
13894 #ifdef __OPTIMIZE__
13895 extern __inline __m512d
13896 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13897 _mm512_fixupimm_pd (__m512d __A, __m512d __B, __m512i __C, const int __imm)
13899 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
13900 (__v8df) __B,
13901 (__v8di) __C,
13902 __imm,
13903 (__mmask8) -1,
13904 _MM_FROUND_CUR_DIRECTION);
13907 extern __inline __m512d
13908 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13909 _mm512_mask_fixupimm_pd (__m512d __A, __mmask8 __U, __m512d __B,
13910 __m512i __C, const int __imm)
13912 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
13913 (__v8df) __B,
13914 (__v8di) __C,
13915 __imm,
13916 (__mmask8) __U,
13917 _MM_FROUND_CUR_DIRECTION);
13920 extern __inline __m512d
13921 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13922 _mm512_maskz_fixupimm_pd (__mmask8 __U, __m512d __A, __m512d __B,
13923 __m512i __C, const int __imm)
13925 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
13926 (__v8df) __B,
13927 (__v8di) __C,
13928 __imm,
13929 (__mmask8) __U,
13930 _MM_FROUND_CUR_DIRECTION);
13933 extern __inline __m512
13934 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13935 _mm512_fixupimm_ps (__m512 __A, __m512 __B, __m512i __C, const int __imm)
13937 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
13938 (__v16sf) __B,
13939 (__v16si) __C,
13940 __imm,
13941 (__mmask16) -1,
13942 _MM_FROUND_CUR_DIRECTION);
13945 extern __inline __m512
13946 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13947 _mm512_mask_fixupimm_ps (__m512 __A, __mmask16 __U, __m512 __B,
13948 __m512i __C, const int __imm)
13950 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
13951 (__v16sf) __B,
13952 (__v16si) __C,
13953 __imm,
13954 (__mmask16) __U,
13955 _MM_FROUND_CUR_DIRECTION);
13958 extern __inline __m512
13959 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13960 _mm512_maskz_fixupimm_ps (__mmask16 __U, __m512 __A, __m512 __B,
13961 __m512i __C, const int __imm)
13963 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
13964 (__v16sf) __B,
13965 (__v16si) __C,
13966 __imm,
13967 (__mmask16) __U,
13968 _MM_FROUND_CUR_DIRECTION);
13971 extern __inline __m128d
13972 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13973 _mm_fixupimm_sd (__m128d __A, __m128d __B, __m128i __C, const int __imm)
13975 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
13976 (__v2df) __B,
13977 (__v2di) __C, __imm,
13978 (__mmask8) -1,
13979 _MM_FROUND_CUR_DIRECTION);
13982 extern __inline __m128d
13983 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13984 _mm_mask_fixupimm_sd (__m128d __A, __mmask8 __U, __m128d __B,
13985 __m128i __C, const int __imm)
13987 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
13988 (__v2df) __B,
13989 (__v2di) __C, __imm,
13990 (__mmask8) __U,
13991 _MM_FROUND_CUR_DIRECTION);
13994 extern __inline __m128d
13995 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13996 _mm_maskz_fixupimm_sd (__mmask8 __U, __m128d __A, __m128d __B,
13997 __m128i __C, const int __imm)
13999 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
14000 (__v2df) __B,
14001 (__v2di) __C,
14002 __imm,
14003 (__mmask8) __U,
14004 _MM_FROUND_CUR_DIRECTION);
14007 extern __inline __m128
14008 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14009 _mm_fixupimm_ss (__m128 __A, __m128 __B, __m128i __C, const int __imm)
14011 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
14012 (__v4sf) __B,
14013 (__v4si) __C, __imm,
14014 (__mmask8) -1,
14015 _MM_FROUND_CUR_DIRECTION);
14018 extern __inline __m128
14019 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14020 _mm_mask_fixupimm_ss (__m128 __A, __mmask8 __U, __m128 __B,
14021 __m128i __C, const int __imm)
14023 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
14024 (__v4sf) __B,
14025 (__v4si) __C, __imm,
14026 (__mmask8) __U,
14027 _MM_FROUND_CUR_DIRECTION);
14030 extern __inline __m128
14031 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14032 _mm_maskz_fixupimm_ss (__mmask8 __U, __m128 __A, __m128 __B,
14033 __m128i __C, const int __imm)
14035 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
14036 (__v4sf) __B,
14037 (__v4si) __C, __imm,
14038 (__mmask8) __U,
14039 _MM_FROUND_CUR_DIRECTION);
14041 #else
14042 #define _mm512_fixupimm_pd(X, Y, Z, C) \
14043 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
14044 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
14045 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
14047 #define _mm512_mask_fixupimm_pd(X, U, Y, Z, C) \
14048 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
14049 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
14050 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
14052 #define _mm512_maskz_fixupimm_pd(U, X, Y, Z, C) \
14053 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
14054 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
14055 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
14057 #define _mm512_fixupimm_ps(X, Y, Z, C) \
14058 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
14059 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
14060 (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
14062 #define _mm512_mask_fixupimm_ps(X, U, Y, Z, C) \
14063 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
14064 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
14065 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
14067 #define _mm512_maskz_fixupimm_ps(U, X, Y, Z, C) \
14068 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
14069 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
14070 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
14072 #define _mm_fixupimm_sd(X, Y, Z, C) \
14073 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
14074 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
14075 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
14077 #define _mm_mask_fixupimm_sd(X, U, Y, Z, C) \
14078 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
14079 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
14080 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
14082 #define _mm_maskz_fixupimm_sd(U, X, Y, Z, C) \
14083 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
14084 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
14085 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
14087 #define _mm_fixupimm_ss(X, Y, Z, C) \
14088 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
14089 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
14090 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
14092 #define _mm_mask_fixupimm_ss(X, U, Y, Z, C) \
14093 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
14094 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
14095 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
14097 #define _mm_maskz_fixupimm_ss(U, X, Y, Z, C) \
14098 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
14099 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
14100 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
14101 #endif
14103 #ifdef __x86_64__
14104 extern __inline unsigned long long
14105 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14106 _mm_cvtss_u64 (__m128 __A)
14108 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
14109 __A,
14110 _MM_FROUND_CUR_DIRECTION);
14113 extern __inline unsigned long long
14114 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14115 _mm_cvttss_u64 (__m128 __A)
14117 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
14118 __A,
14119 _MM_FROUND_CUR_DIRECTION);
14122 extern __inline long long
14123 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14124 _mm_cvttss_i64 (__m128 __A)
14126 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
14127 _MM_FROUND_CUR_DIRECTION);
14129 #endif /* __x86_64__ */
14131 extern __inline unsigned
14132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14133 _mm_cvtss_u32 (__m128 __A)
14135 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
14136 _MM_FROUND_CUR_DIRECTION);
14139 extern __inline unsigned
14140 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14141 _mm_cvttss_u32 (__m128 __A)
14143 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
14144 _MM_FROUND_CUR_DIRECTION);
14147 extern __inline int
14148 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14149 _mm_cvttss_i32 (__m128 __A)
14151 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
14152 _MM_FROUND_CUR_DIRECTION);
14155 #ifdef __x86_64__
14156 extern __inline unsigned long long
14157 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14158 _mm_cvtsd_u64 (__m128d __A)
14160 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
14161 __A,
14162 _MM_FROUND_CUR_DIRECTION);
14165 extern __inline unsigned long long
14166 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14167 _mm_cvttsd_u64 (__m128d __A)
14169 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
14170 __A,
14171 _MM_FROUND_CUR_DIRECTION);
14174 extern __inline long long
14175 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14176 _mm_cvttsd_i64 (__m128d __A)
14178 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
14179 _MM_FROUND_CUR_DIRECTION);
14181 #endif /* __x86_64__ */
14183 extern __inline unsigned
14184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14185 _mm_cvtsd_u32 (__m128d __A)
14187 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
14188 _MM_FROUND_CUR_DIRECTION);
14191 extern __inline unsigned
14192 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14193 _mm_cvttsd_u32 (__m128d __A)
14195 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
14196 _MM_FROUND_CUR_DIRECTION);
14199 extern __inline int
14200 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14201 _mm_cvttsd_i32 (__m128d __A)
14203 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
14204 _MM_FROUND_CUR_DIRECTION);
14207 extern __inline __m512d
14208 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14209 _mm512_cvtps_pd (__m256 __A)
14211 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
14212 (__v8df)
14213 _mm512_undefined_pd (),
14214 (__mmask8) -1,
14215 _MM_FROUND_CUR_DIRECTION);
14218 extern __inline __m512d
14219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14220 _mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
14222 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
14223 (__v8df) __W,
14224 (__mmask8) __U,
14225 _MM_FROUND_CUR_DIRECTION);
14228 extern __inline __m512d
14229 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14230 _mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
14232 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
14233 (__v8df)
14234 _mm512_setzero_pd (),
14235 (__mmask8) __U,
14236 _MM_FROUND_CUR_DIRECTION);
14239 extern __inline __m512
14240 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14241 _mm512_cvtph_ps (__m256i __A)
14243 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
14244 (__v16sf)
14245 _mm512_undefined_ps (),
14246 (__mmask16) -1,
14247 _MM_FROUND_CUR_DIRECTION);
14250 extern __inline __m512
14251 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14252 _mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
14254 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
14255 (__v16sf) __W,
14256 (__mmask16) __U,
14257 _MM_FROUND_CUR_DIRECTION);
14260 extern __inline __m512
14261 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14262 _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
14264 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
14265 (__v16sf)
14266 _mm512_setzero_ps (),
14267 (__mmask16) __U,
14268 _MM_FROUND_CUR_DIRECTION);
14271 extern __inline __m256
14272 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14273 _mm512_cvtpd_ps (__m512d __A)
14275 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
14276 (__v8sf)
14277 _mm256_undefined_ps (),
14278 (__mmask8) -1,
14279 _MM_FROUND_CUR_DIRECTION);
14282 extern __inline __m256
14283 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14284 _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
14286 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
14287 (__v8sf) __W,
14288 (__mmask8) __U,
14289 _MM_FROUND_CUR_DIRECTION);
14292 extern __inline __m256
14293 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14294 _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
14296 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
14297 (__v8sf)
14298 _mm256_setzero_ps (),
14299 (__mmask8) __U,
14300 _MM_FROUND_CUR_DIRECTION);
14303 #ifdef __OPTIMIZE__
14304 extern __inline __m512
14305 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14306 _mm512_getexp_ps (__m512 __A)
14308 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
14309 (__v16sf)
14310 _mm512_undefined_ps (),
14311 (__mmask16) -1,
14312 _MM_FROUND_CUR_DIRECTION);
14315 extern __inline __m512
14316 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14317 _mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
14319 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
14320 (__v16sf) __W,
14321 (__mmask16) __U,
14322 _MM_FROUND_CUR_DIRECTION);
14325 extern __inline __m512
14326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14327 _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
14329 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
14330 (__v16sf)
14331 _mm512_setzero_ps (),
14332 (__mmask16) __U,
14333 _MM_FROUND_CUR_DIRECTION);
14336 extern __inline __m512d
14337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14338 _mm512_getexp_pd (__m512d __A)
14340 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
14341 (__v8df)
14342 _mm512_undefined_pd (),
14343 (__mmask8) -1,
14344 _MM_FROUND_CUR_DIRECTION);
14347 extern __inline __m512d
14348 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14349 _mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
14351 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
14352 (__v8df) __W,
14353 (__mmask8) __U,
14354 _MM_FROUND_CUR_DIRECTION);
14357 extern __inline __m512d
14358 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14359 _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
14361 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
14362 (__v8df)
14363 _mm512_setzero_pd (),
14364 (__mmask8) __U,
14365 _MM_FROUND_CUR_DIRECTION);
14368 extern __inline __m128
14369 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14370 _mm_getexp_ss (__m128 __A, __m128 __B)
14372 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
14373 (__v4sf) __B,
14374 _MM_FROUND_CUR_DIRECTION);
14377 extern __inline __m128
14378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14379 _mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
14381 return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
14382 (__v4sf) __B,
14383 (__v4sf) __W,
14384 (__mmask8) __U,
14385 _MM_FROUND_CUR_DIRECTION);
14388 extern __inline __m128
14389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14390 _mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
14392 return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
14393 (__v4sf) __B,
14394 (__v4sf)
14395 _mm_setzero_ps (),
14396 (__mmask8) __U,
14397 _MM_FROUND_CUR_DIRECTION);
14400 extern __inline __m128d
14401 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14402 _mm_getexp_sd (__m128d __A, __m128d __B)
14404 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
14405 (__v2df) __B,
14406 _MM_FROUND_CUR_DIRECTION);
14409 extern __inline __m128d
14410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14411 _mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
14413 return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
14414 (__v2df) __B,
14415 (__v2df) __W,
14416 (__mmask8) __U,
14417 _MM_FROUND_CUR_DIRECTION);
14420 extern __inline __m128d
14421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14422 _mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
14424 return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
14425 (__v2df) __B,
14426 (__v2df)
14427 _mm_setzero_pd (),
14428 (__mmask8) __U,
14429 _MM_FROUND_CUR_DIRECTION);
14432 extern __inline __m512d
14433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14434 _mm512_getmant_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
14435 _MM_MANTISSA_SIGN_ENUM __C)
14437 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
14438 (__C << 2) | __B,
14439 _mm512_undefined_pd (),
14440 (__mmask8) -1,
14441 _MM_FROUND_CUR_DIRECTION);
14444 extern __inline __m512d
14445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14446 _mm512_mask_getmant_pd (__m512d __W, __mmask8 __U, __m512d __A,
14447 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
14449 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
14450 (__C << 2) | __B,
14451 (__v8df) __W, __U,
14452 _MM_FROUND_CUR_DIRECTION);
14455 extern __inline __m512d
14456 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14457 _mm512_maskz_getmant_pd (__mmask8 __U, __m512d __A,
14458 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
14460 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
14461 (__C << 2) | __B,
14462 (__v8df)
14463 _mm512_setzero_pd (),
14464 __U,
14465 _MM_FROUND_CUR_DIRECTION);
14468 extern __inline __m512
14469 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14470 _mm512_getmant_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
14471 _MM_MANTISSA_SIGN_ENUM __C)
14473 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
14474 (__C << 2) | __B,
14475 _mm512_undefined_ps (),
14476 (__mmask16) -1,
14477 _MM_FROUND_CUR_DIRECTION);
14480 extern __inline __m512
14481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14482 _mm512_mask_getmant_ps (__m512 __W, __mmask16 __U, __m512 __A,
14483 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
14485 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
14486 (__C << 2) | __B,
14487 (__v16sf) __W, __U,
14488 _MM_FROUND_CUR_DIRECTION);
14491 extern __inline __m512
14492 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14493 _mm512_maskz_getmant_ps (__mmask16 __U, __m512 __A,
14494 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
14496 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
14497 (__C << 2) | __B,
14498 (__v16sf)
14499 _mm512_setzero_ps (),
14500 __U,
14501 _MM_FROUND_CUR_DIRECTION);
14504 extern __inline __m128d
14505 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14506 _mm_getmant_sd (__m128d __A, __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
14507 _MM_MANTISSA_SIGN_ENUM __D)
14509 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
14510 (__v2df) __B,
14511 (__D << 2) | __C,
14512 _MM_FROUND_CUR_DIRECTION);
14515 extern __inline __m128d
14516 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14517 _mm_mask_getmant_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
14518 _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
14520 return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
14521 (__v2df) __B,
14522 (__D << 2) | __C,
14523 (__v2df) __W,
14524 __U,
14525 _MM_FROUND_CUR_DIRECTION);
14528 extern __inline __m128d
14529 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14530 _mm_maskz_getmant_sd (__mmask8 __U, __m128d __A, __m128d __B,
14531 _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
14533 return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
14534 (__v2df) __B,
14535 (__D << 2) | __C,
14536 (__v2df)
14537 _mm_setzero_pd(),
14538 __U,
14539 _MM_FROUND_CUR_DIRECTION);
14542 extern __inline __m128
14543 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14544 _mm_getmant_ss (__m128 __A, __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
14545 _MM_MANTISSA_SIGN_ENUM __D)
14547 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
14548 (__v4sf) __B,
14549 (__D << 2) | __C,
14550 _MM_FROUND_CUR_DIRECTION);
14553 extern __inline __m128
14554 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14555 _mm_mask_getmant_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
14556 _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
14558 return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
14559 (__v4sf) __B,
14560 (__D << 2) | __C,
14561 (__v4sf) __W,
14562 __U,
14563 _MM_FROUND_CUR_DIRECTION);
14566 extern __inline __m128
14567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14568 _mm_maskz_getmant_ss (__mmask8 __U, __m128 __A, __m128 __B,
14569 _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
14571 return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
14572 (__v4sf) __B,
14573 (__D << 2) | __C,
14574 (__v4sf)
14575 _mm_setzero_ps(),
14576 __U,
14577 _MM_FROUND_CUR_DIRECTION);
14580 #else
14581 #define _mm512_getmant_pd(X, B, C) \
14582 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
14583 (int)(((C)<<2) | (B)), \
14584 (__v8df)_mm512_undefined_pd(), \
14585 (__mmask8)-1,\
14586 _MM_FROUND_CUR_DIRECTION))
14588 #define _mm512_mask_getmant_pd(W, U, X, B, C) \
14589 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
14590 (int)(((C)<<2) | (B)), \
14591 (__v8df)(__m512d)(W), \
14592 (__mmask8)(U),\
14593 _MM_FROUND_CUR_DIRECTION))
14595 #define _mm512_maskz_getmant_pd(U, X, B, C) \
14596 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
14597 (int)(((C)<<2) | (B)), \
14598 (__v8df)_mm512_setzero_pd(), \
14599 (__mmask8)(U),\
14600 _MM_FROUND_CUR_DIRECTION))
14601 #define _mm512_getmant_ps(X, B, C) \
14602 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
14603 (int)(((C)<<2) | (B)), \
14604 (__v16sf)_mm512_undefined_ps(), \
14605 (__mmask16)-1,\
14606 _MM_FROUND_CUR_DIRECTION))
14608 #define _mm512_mask_getmant_ps(W, U, X, B, C) \
14609 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
14610 (int)(((C)<<2) | (B)), \
14611 (__v16sf)(__m512)(W), \
14612 (__mmask16)(U),\
14613 _MM_FROUND_CUR_DIRECTION))
14615 #define _mm512_maskz_getmant_ps(U, X, B, C) \
14616 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
14617 (int)(((C)<<2) | (B)), \
14618 (__v16sf)_mm512_setzero_ps(), \
14619 (__mmask16)(U),\
14620 _MM_FROUND_CUR_DIRECTION))
14621 #define _mm_getmant_sd(X, Y, C, D) \
14622 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
14623 (__v2df)(__m128d)(Y), \
14624 (int)(((D)<<2) | (C)), \
14625 _MM_FROUND_CUR_DIRECTION))
14627 #define _mm_mask_getmant_sd(W, U, X, Y, C, D) \
14628 ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
14629 (__v2df)(__m128d)(Y), \
14630 (int)(((D)<<2) | (C)), \
14631 (__v2df)(__m128d)(W), \
14632 (__mmask8)(U),\
14633 _MM_FROUND_CUR_DIRECTION))
14635 #define _mm_maskz_getmant_sd(U, X, Y, C, D) \
14636 ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
14637 (__v2df)(__m128d)(Y), \
14638 (int)(((D)<<2) | (C)), \
14639 (__v2df)_mm_setzero_pd(), \
14640 (__mmask8)(U),\
14641 _MM_FROUND_CUR_DIRECTION))
14643 #define _mm_getmant_ss(X, Y, C, D) \
14644 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
14645 (__v4sf)(__m128)(Y), \
14646 (int)(((D)<<2) | (C)), \
14647 _MM_FROUND_CUR_DIRECTION))
14649 #define _mm_mask_getmant_ss(W, U, X, Y, C, D) \
14650 ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
14651 (__v4sf)(__m128)(Y), \
14652 (int)(((D)<<2) | (C)), \
14653 (__v4sf)(__m128)(W), \
14654 (__mmask8)(U),\
14655 _MM_FROUND_CUR_DIRECTION))
14657 #define _mm_maskz_getmant_ss(U, X, Y, C, D) \
14658 ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
14659 (__v4sf)(__m128)(Y), \
14660 (int)(((D)<<2) | (C)), \
14661 (__v4sf)_mm_setzero_ps(), \
14662 (__mmask8)(U),\
14663 _MM_FROUND_CUR_DIRECTION))
14665 #define _mm_getexp_ss(A, B) \
14666 ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
14667 _MM_FROUND_CUR_DIRECTION))
14669 #define _mm_mask_getexp_ss(W, U, A, B) \
14670 (__m128)__builtin_ia32_getexpss_mask_round(A, B, W, U,\
14671 _MM_FROUND_CUR_DIRECTION)
14673 #define _mm_maskz_getexp_ss(U, A, B) \
14674 (__m128)__builtin_ia32_getexpss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U,\
14675 _MM_FROUND_CUR_DIRECTION)
14677 #define _mm_getexp_sd(A, B) \
14678 ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B),\
14679 _MM_FROUND_CUR_DIRECTION))
14681 #define _mm_mask_getexp_sd(W, U, A, B) \
14682 (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, W, U,\
14683 _MM_FROUND_CUR_DIRECTION)
14685 #define _mm_maskz_getexp_sd(U, A, B) \
14686 (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U,\
14687 _MM_FROUND_CUR_DIRECTION)
14689 #define _mm512_getexp_ps(A) \
14690 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
14691 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
14693 #define _mm512_mask_getexp_ps(W, U, A) \
14694 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
14695 (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
14697 #define _mm512_maskz_getexp_ps(U, A) \
14698 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
14699 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
14701 #define _mm512_getexp_pd(A) \
14702 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
14703 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
14705 #define _mm512_mask_getexp_pd(W, U, A) \
14706 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
14707 (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
14709 #define _mm512_maskz_getexp_pd(U, A) \
14710 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
14711 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
14712 #endif
14714 #ifdef __OPTIMIZE__
14715 extern __inline __m512
14716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14717 _mm512_roundscale_ps (__m512 __A, const int __imm)
14719 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
14720 (__v16sf)
14721 _mm512_undefined_ps (),
14723 _MM_FROUND_CUR_DIRECTION);
14726 extern __inline __m512
14727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14728 _mm512_mask_roundscale_ps (__m512 __A, __mmask16 __B, __m512 __C,
14729 const int __imm)
14731 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
14732 (__v16sf) __A,
14733 (__mmask16) __B,
14734 _MM_FROUND_CUR_DIRECTION);
14737 extern __inline __m512
14738 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14739 _mm512_maskz_roundscale_ps (__mmask16 __A, __m512 __B, const int __imm)
14741 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
14742 __imm,
14743 (__v16sf)
14744 _mm512_setzero_ps (),
14745 (__mmask16) __A,
14746 _MM_FROUND_CUR_DIRECTION);
14749 extern __inline __m512d
14750 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14751 _mm512_roundscale_pd (__m512d __A, const int __imm)
14753 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
14754 (__v8df)
14755 _mm512_undefined_pd (),
14757 _MM_FROUND_CUR_DIRECTION);
14760 extern __inline __m512d
14761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14762 _mm512_mask_roundscale_pd (__m512d __A, __mmask8 __B, __m512d __C,
14763 const int __imm)
14765 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
14766 (__v8df) __A,
14767 (__mmask8) __B,
14768 _MM_FROUND_CUR_DIRECTION);
14771 extern __inline __m512d
14772 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14773 _mm512_maskz_roundscale_pd (__mmask8 __A, __m512d __B, const int __imm)
14775 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
14776 __imm,
14777 (__v8df)
14778 _mm512_setzero_pd (),
14779 (__mmask8) __A,
14780 _MM_FROUND_CUR_DIRECTION);
14783 extern __inline __m128
14784 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14785 _mm_roundscale_ss (__m128 __A, __m128 __B, const int __imm)
14787 return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
14788 (__v4sf) __B, __imm,
14789 _MM_FROUND_CUR_DIRECTION);
14792 extern __inline __m128d
14793 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14794 _mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm)
14796 return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
14797 (__v2df) __B, __imm,
14798 _MM_FROUND_CUR_DIRECTION);
14801 #else
14802 #define _mm512_roundscale_ps(A, B) \
14803 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
14804 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
14805 #define _mm512_mask_roundscale_ps(A, B, C, D) \
14806 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
14807 (int)(D), \
14808 (__v16sf)(__m512)(A), \
14809 (__mmask16)(B), _MM_FROUND_CUR_DIRECTION))
14810 #define _mm512_maskz_roundscale_ps(A, B, C) \
14811 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
14812 (int)(C), \
14813 (__v16sf)_mm512_setzero_ps(),\
14814 (__mmask16)(A), _MM_FROUND_CUR_DIRECTION))
14815 #define _mm512_roundscale_pd(A, B) \
14816 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
14817 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
14818 #define _mm512_mask_roundscale_pd(A, B, C, D) \
14819 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
14820 (int)(D), \
14821 (__v8df)(__m512d)(A), \
14822 (__mmask8)(B), _MM_FROUND_CUR_DIRECTION))
14823 #define _mm512_maskz_roundscale_pd(A, B, C) \
14824 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
14825 (int)(C), \
14826 (__v8df)_mm512_setzero_pd(),\
14827 (__mmask8)(A), _MM_FROUND_CUR_DIRECTION))
14828 #define _mm_roundscale_ss(A, B, C) \
14829 ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \
14830 (__v4sf)(__m128)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
14831 #define _mm_roundscale_sd(A, B, C) \
14832 ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \
14833 (__v2df)(__m128d)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
14834 #endif
14836 #ifdef __OPTIMIZE__
14837 extern __inline __mmask8
14838 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14839 _mm512_cmp_pd_mask (__m512d __X, __m512d __Y, const int __P)
14841 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14842 (__v8df) __Y, __P,
14843 (__mmask8) -1,
14844 _MM_FROUND_CUR_DIRECTION);
14847 extern __inline __mmask16
14848 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14849 _mm512_cmp_ps_mask (__m512 __X, __m512 __Y, const int __P)
14851 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14852 (__v16sf) __Y, __P,
14853 (__mmask16) -1,
14854 _MM_FROUND_CUR_DIRECTION);
14857 extern __inline __mmask16
14858 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14859 _mm512_mask_cmp_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y, const int __P)
14861 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14862 (__v16sf) __Y, __P,
14863 (__mmask16) __U,
14864 _MM_FROUND_CUR_DIRECTION);
14867 extern __inline __mmask8
14868 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14869 _mm512_mask_cmp_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, const int __P)
14871 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14872 (__v8df) __Y, __P,
14873 (__mmask8) __U,
14874 _MM_FROUND_CUR_DIRECTION);
14877 extern __inline __mmask8
14878 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14879 _mm512_cmpeq_pd_mask (__m512d __X, __m512d __Y)
14881 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14882 (__v8df) __Y, _CMP_EQ_OQ,
14883 (__mmask8) -1,
14884 _MM_FROUND_CUR_DIRECTION);
14887 extern __inline __mmask8
14888 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14889 _mm512_mask_cmpeq_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
14891 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14892 (__v8df) __Y, _CMP_EQ_OQ,
14893 (__mmask8) __U,
14894 _MM_FROUND_CUR_DIRECTION);
14897 extern __inline __mmask8
14898 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14899 _mm512_cmplt_pd_mask (__m512d __X, __m512d __Y)
14901 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14902 (__v8df) __Y, _CMP_LT_OS,
14903 (__mmask8) -1,
14904 _MM_FROUND_CUR_DIRECTION);
14907 extern __inline __mmask8
14908 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14909 _mm512_mask_cmplt_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
14911 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14912 (__v8df) __Y, _CMP_LT_OS,
14913 (__mmask8) __U,
14914 _MM_FROUND_CUR_DIRECTION);
14917 extern __inline __mmask8
14918 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14919 _mm512_cmple_pd_mask (__m512d __X, __m512d __Y)
14921 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14922 (__v8df) __Y, _CMP_LE_OS,
14923 (__mmask8) -1,
14924 _MM_FROUND_CUR_DIRECTION);
14927 extern __inline __mmask8
14928 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14929 _mm512_mask_cmple_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
14931 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14932 (__v8df) __Y, _CMP_LE_OS,
14933 (__mmask8) __U,
14934 _MM_FROUND_CUR_DIRECTION);
14937 extern __inline __mmask8
14938 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14939 _mm512_cmpunord_pd_mask (__m512d __X, __m512d __Y)
14941 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14942 (__v8df) __Y, _CMP_UNORD_Q,
14943 (__mmask8) -1,
14944 _MM_FROUND_CUR_DIRECTION);
14947 extern __inline __mmask8
14948 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14949 _mm512_mask_cmpunord_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
14951 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14952 (__v8df) __Y, _CMP_UNORD_Q,
14953 (__mmask8) __U,
14954 _MM_FROUND_CUR_DIRECTION);
14957 extern __inline __mmask8
14958 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14959 _mm512_cmpneq_pd_mask (__m512d __X, __m512d __Y)
14961 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14962 (__v8df) __Y, _CMP_NEQ_UQ,
14963 (__mmask8) -1,
14964 _MM_FROUND_CUR_DIRECTION);
14967 extern __inline __mmask8
14968 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14969 _mm512_mask_cmpneq_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
14971 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14972 (__v8df) __Y, _CMP_NEQ_UQ,
14973 (__mmask8) __U,
14974 _MM_FROUND_CUR_DIRECTION);
14977 extern __inline __mmask8
14978 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14979 _mm512_cmpnlt_pd_mask (__m512d __X, __m512d __Y)
14981 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14982 (__v8df) __Y, _CMP_NLT_US,
14983 (__mmask8) -1,
14984 _MM_FROUND_CUR_DIRECTION);
14987 extern __inline __mmask8
14988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14989 _mm512_mask_cmpnlt_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
14991 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14992 (__v8df) __Y, _CMP_NLT_US,
14993 (__mmask8) __U,
14994 _MM_FROUND_CUR_DIRECTION);
14997 extern __inline __mmask8
14998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14999 _mm512_cmpnle_pd_mask (__m512d __X, __m512d __Y)
15001 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15002 (__v8df) __Y, _CMP_NLE_US,
15003 (__mmask8) -1,
15004 _MM_FROUND_CUR_DIRECTION);
15007 extern __inline __mmask8
15008 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15009 _mm512_mask_cmpnle_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
15011 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15012 (__v8df) __Y, _CMP_NLE_US,
15013 (__mmask8) __U,
15014 _MM_FROUND_CUR_DIRECTION);
15017 extern __inline __mmask8
15018 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15019 _mm512_cmpord_pd_mask (__m512d __X, __m512d __Y)
15021 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15022 (__v8df) __Y, _CMP_ORD_Q,
15023 (__mmask8) -1,
15024 _MM_FROUND_CUR_DIRECTION);
15027 extern __inline __mmask8
15028 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15029 _mm512_mask_cmpord_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
15031 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15032 (__v8df) __Y, _CMP_ORD_Q,
15033 (__mmask8) __U,
15034 _MM_FROUND_CUR_DIRECTION);
15037 extern __inline __mmask16
15038 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15039 _mm512_cmpeq_ps_mask (__m512 __X, __m512 __Y)
15041 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15042 (__v16sf) __Y, _CMP_EQ_OQ,
15043 (__mmask16) -1,
15044 _MM_FROUND_CUR_DIRECTION);
15047 extern __inline __mmask16
15048 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15049 _mm512_mask_cmpeq_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
15051 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15052 (__v16sf) __Y, _CMP_EQ_OQ,
15053 (__mmask16) __U,
15054 _MM_FROUND_CUR_DIRECTION);
15057 extern __inline __mmask16
15058 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15059 _mm512_cmplt_ps_mask (__m512 __X, __m512 __Y)
15061 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15062 (__v16sf) __Y, _CMP_LT_OS,
15063 (__mmask16) -1,
15064 _MM_FROUND_CUR_DIRECTION);
15067 extern __inline __mmask16
15068 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15069 _mm512_mask_cmplt_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
15071 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15072 (__v16sf) __Y, _CMP_LT_OS,
15073 (__mmask16) __U,
15074 _MM_FROUND_CUR_DIRECTION);
15077 extern __inline __mmask16
15078 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15079 _mm512_cmple_ps_mask (__m512 __X, __m512 __Y)
15081 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15082 (__v16sf) __Y, _CMP_LE_OS,
15083 (__mmask16) -1,
15084 _MM_FROUND_CUR_DIRECTION);
15087 extern __inline __mmask16
15088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15089 _mm512_mask_cmple_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
15091 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15092 (__v16sf) __Y, _CMP_LE_OS,
15093 (__mmask16) __U,
15094 _MM_FROUND_CUR_DIRECTION);
15097 extern __inline __mmask16
15098 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15099 _mm512_cmpunord_ps_mask (__m512 __X, __m512 __Y)
15101 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15102 (__v16sf) __Y, _CMP_UNORD_Q,
15103 (__mmask16) -1,
15104 _MM_FROUND_CUR_DIRECTION);
15107 extern __inline __mmask16
15108 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15109 _mm512_mask_cmpunord_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
15111 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15112 (__v16sf) __Y, _CMP_UNORD_Q,
15113 (__mmask16) __U,
15114 _MM_FROUND_CUR_DIRECTION);
15117 extern __inline __mmask16
15118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15119 _mm512_cmpneq_ps_mask (__m512 __X, __m512 __Y)
15121 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15122 (__v16sf) __Y, _CMP_NEQ_UQ,
15123 (__mmask16) -1,
15124 _MM_FROUND_CUR_DIRECTION);
15127 extern __inline __mmask16
15128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15129 _mm512_mask_cmpneq_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
15131 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15132 (__v16sf) __Y, _CMP_NEQ_UQ,
15133 (__mmask16) __U,
15134 _MM_FROUND_CUR_DIRECTION);
15137 extern __inline __mmask16
15138 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15139 _mm512_cmpnlt_ps_mask (__m512 __X, __m512 __Y)
15141 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15142 (__v16sf) __Y, _CMP_NLT_US,
15143 (__mmask16) -1,
15144 _MM_FROUND_CUR_DIRECTION);
15147 extern __inline __mmask16
15148 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15149 _mm512_mask_cmpnlt_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
15151 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15152 (__v16sf) __Y, _CMP_NLT_US,
15153 (__mmask16) __U,
15154 _MM_FROUND_CUR_DIRECTION);
15157 extern __inline __mmask16
15158 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15159 _mm512_cmpnle_ps_mask (__m512 __X, __m512 __Y)
15161 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15162 (__v16sf) __Y, _CMP_NLE_US,
15163 (__mmask16) -1,
15164 _MM_FROUND_CUR_DIRECTION);
15167 extern __inline __mmask16
15168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15169 _mm512_mask_cmpnle_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
15171 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15172 (__v16sf) __Y, _CMP_NLE_US,
15173 (__mmask16) __U,
15174 _MM_FROUND_CUR_DIRECTION);
15177 extern __inline __mmask16
15178 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15179 _mm512_cmpord_ps_mask (__m512 __X, __m512 __Y)
15181 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15182 (__v16sf) __Y, _CMP_ORD_Q,
15183 (__mmask16) -1,
15184 _MM_FROUND_CUR_DIRECTION);
15187 extern __inline __mmask16
15188 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15189 _mm512_mask_cmpord_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
15191 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15192 (__v16sf) __Y, _CMP_ORD_Q,
15193 (__mmask16) __U,
15194 _MM_FROUND_CUR_DIRECTION);
15197 extern __inline __mmask8
15198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15199 _mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P)
15201 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
15202 (__v2df) __Y, __P,
15203 (__mmask8) -1,
15204 _MM_FROUND_CUR_DIRECTION);
15207 extern __inline __mmask8
15208 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15209 _mm_mask_cmp_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, const int __P)
15211 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
15212 (__v2df) __Y, __P,
15213 (__mmask8) __M,
15214 _MM_FROUND_CUR_DIRECTION);
15217 extern __inline __mmask8
15218 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15219 _mm_cmp_ss_mask (__m128 __X, __m128 __Y, const int __P)
15221 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
15222 (__v4sf) __Y, __P,
15223 (__mmask8) -1,
15224 _MM_FROUND_CUR_DIRECTION);
15227 extern __inline __mmask8
15228 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15229 _mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
15231 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
15232 (__v4sf) __Y, __P,
15233 (__mmask8) __M,
15234 _MM_FROUND_CUR_DIRECTION);
15237 #else
15238 #define _mm512_cmp_pd_mask(X, Y, P) \
15239 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
15240 (__v8df)(__m512d)(Y), (int)(P),\
15241 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
15243 #define _mm512_cmp_ps_mask(X, Y, P) \
15244 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
15245 (__v16sf)(__m512)(Y), (int)(P),\
15246 (__mmask16)-1,_MM_FROUND_CUR_DIRECTION))
15248 #define _mm512_mask_cmp_pd_mask(M, X, Y, P) \
15249 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
15250 (__v8df)(__m512d)(Y), (int)(P),\
15251 (__mmask8)M, _MM_FROUND_CUR_DIRECTION))
15253 #define _mm512_mask_cmp_ps_mask(M, X, Y, P) \
15254 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
15255 (__v16sf)(__m512)(Y), (int)(P),\
15256 (__mmask16)M,_MM_FROUND_CUR_DIRECTION))
15258 #define _mm_cmp_sd_mask(X, Y, P) \
15259 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
15260 (__v2df)(__m128d)(Y), (int)(P),\
15261 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
15263 #define _mm_mask_cmp_sd_mask(M, X, Y, P) \
15264 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
15265 (__v2df)(__m128d)(Y), (int)(P),\
15266 M,_MM_FROUND_CUR_DIRECTION))
15268 #define _mm_cmp_ss_mask(X, Y, P) \
15269 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
15270 (__v4sf)(__m128)(Y), (int)(P), \
15271 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
15273 #define _mm_mask_cmp_ss_mask(M, X, Y, P) \
15274 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
15275 (__v4sf)(__m128)(Y), (int)(P), \
15276 M,_MM_FROUND_CUR_DIRECTION))
15277 #endif
15279 extern __inline __mmask16
15280 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15281 _mm512_kmov (__mmask16 __A)
15283 return __builtin_ia32_kmovw (__A);
15286 extern __inline __m512
15287 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15288 _mm512_castpd_ps (__m512d __A)
15290 return (__m512) (__A);
15293 extern __inline __m512i
15294 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15295 _mm512_castpd_si512 (__m512d __A)
15297 return (__m512i) (__A);
15300 extern __inline __m512d
15301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15302 _mm512_castps_pd (__m512 __A)
15304 return (__m512d) (__A);
15307 extern __inline __m512i
15308 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15309 _mm512_castps_si512 (__m512 __A)
15311 return (__m512i) (__A);
15314 extern __inline __m512
15315 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15316 _mm512_castsi512_ps (__m512i __A)
15318 return (__m512) (__A);
15321 extern __inline __m512d
15322 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15323 _mm512_castsi512_pd (__m512i __A)
15325 return (__m512d) (__A);
15328 extern __inline __m128d
15329 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15330 _mm512_castpd512_pd128 (__m512d __A)
15332 return (__m128d)_mm512_extractf32x4_ps((__m512)__A, 0);
15335 extern __inline __m128
15336 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15337 _mm512_castps512_ps128 (__m512 __A)
15339 return _mm512_extractf32x4_ps(__A, 0);
15342 extern __inline __m128i
15343 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15344 _mm512_castsi512_si128 (__m512i __A)
15346 return (__m128i)_mm512_extracti32x4_epi32((__m512i)__A, 0);
15349 extern __inline __m256d
15350 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15351 _mm512_castpd512_pd256 (__m512d __A)
15353 return _mm512_extractf64x4_pd(__A, 0);
15356 extern __inline __m256
15357 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15358 _mm512_castps512_ps256 (__m512 __A)
15360 return (__m256)_mm512_extractf64x4_pd((__m512d)__A, 0);
15363 extern __inline __m256i
15364 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15365 _mm512_castsi512_si256 (__m512i __A)
15367 return (__m256i)_mm512_extractf64x4_pd((__m512d)__A, 0);
15370 extern __inline __m512d
15371 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15372 _mm512_castpd128_pd512 (__m128d __A)
15374 return (__m512d) __builtin_ia32_pd512_pd((__m128d)__A);
15377 extern __inline __m512
15378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15379 _mm512_castps128_ps512 (__m128 __A)
15381 return (__m512) __builtin_ia32_ps512_ps((__m128)__A);
15384 extern __inline __m512i
15385 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15386 _mm512_castsi128_si512 (__m128i __A)
15388 return (__m512i) __builtin_ia32_si512_si((__v4si)__A);
15391 extern __inline __m512d
15392 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15393 _mm512_castpd256_pd512 (__m256d __A)
15395 return __builtin_ia32_pd512_256pd (__A);
15398 extern __inline __m512
15399 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15400 _mm512_castps256_ps512 (__m256 __A)
15402 return __builtin_ia32_ps512_256ps (__A);
15405 extern __inline __m512i
15406 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15407 _mm512_castsi256_si512 (__m256i __A)
15409 return (__m512i)__builtin_ia32_si512_256si ((__v8si)__A);
15412 extern __inline __mmask16
15413 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15414 _mm512_cmpeq_epu32_mask (__m512i __A, __m512i __B)
15416 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
15417 (__v16si) __B, 0,
15418 (__mmask16) -1);
15421 extern __inline __mmask16
15422 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15423 _mm512_mask_cmpeq_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
15425 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
15426 (__v16si) __B, 0, __U);
15429 extern __inline __mmask8
15430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15431 _mm512_mask_cmpeq_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
15433 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
15434 (__v8di) __B, 0, __U);
15437 extern __inline __mmask8
15438 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15439 _mm512_cmpeq_epu64_mask (__m512i __A, __m512i __B)
15441 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
15442 (__v8di) __B, 0,
15443 (__mmask8) -1);
15446 extern __inline __mmask16
15447 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15448 _mm512_cmpgt_epu32_mask (__m512i __A, __m512i __B)
15450 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
15451 (__v16si) __B, 6,
15452 (__mmask16) -1);
15455 extern __inline __mmask16
15456 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15457 _mm512_mask_cmpgt_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
15459 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
15460 (__v16si) __B, 6, __U);
15463 extern __inline __mmask8
15464 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15465 _mm512_mask_cmpgt_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
15467 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
15468 (__v8di) __B, 6, __U);
15471 extern __inline __mmask8
15472 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15473 _mm512_cmpgt_epu64_mask (__m512i __A, __m512i __B)
15475 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
15476 (__v8di) __B, 6,
15477 (__mmask8) -1);
15480 #undef __MM512_REDUCE_OP
15481 #define __MM512_REDUCE_OP(op) \
15482 __v8si __T1 = (__v8si) _mm512_extracti64x4_epi64 (__A, 1); \
15483 __v8si __T2 = (__v8si) _mm512_extracti64x4_epi64 (__A, 0); \
15484 __m256i __T3 = (__m256i) (__T1 op __T2); \
15485 __v4si __T4 = (__v4si) _mm256_extracti128_si256 (__T3, 1); \
15486 __v4si __T5 = (__v4si) _mm256_extracti128_si256 (__T3, 0); \
15487 __v4si __T6 = __T4 op __T5; \
15488 __v4si __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 }); \
15489 __v4si __T8 = __T6 op __T7; \
15490 return __T8[0] op __T8[1]
15492 extern __inline int
15493 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15494 _mm512_reduce_add_epi32 (__m512i __A)
15496 __MM512_REDUCE_OP (+);
15499 extern __inline int
15500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15501 _mm512_reduce_mul_epi32 (__m512i __A)
15503 __MM512_REDUCE_OP (*);
15506 extern __inline int
15507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15508 _mm512_reduce_and_epi32 (__m512i __A)
15510 __MM512_REDUCE_OP (&);
15513 extern __inline int
15514 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15515 _mm512_reduce_or_epi32 (__m512i __A)
15517 __MM512_REDUCE_OP (|);
15520 extern __inline int
15521 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15522 _mm512_mask_reduce_add_epi32 (__mmask16 __U, __m512i __A)
15524 __A = _mm512_maskz_mov_epi32 (__U, __A);
15525 __MM512_REDUCE_OP (+);
15528 extern __inline int
15529 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15530 _mm512_mask_reduce_mul_epi32 (__mmask16 __U, __m512i __A)
15532 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (1), __U, __A);
15533 __MM512_REDUCE_OP (*);
15536 extern __inline int
15537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15538 _mm512_mask_reduce_and_epi32 (__mmask16 __U, __m512i __A)
15540 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (~0), __U, __A);
15541 __MM512_REDUCE_OP (&);
15544 extern __inline int
15545 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15546 _mm512_mask_reduce_or_epi32 (__mmask16 __U, __m512i __A)
15548 __A = _mm512_maskz_mov_epi32 (__U, __A);
15549 __MM512_REDUCE_OP (|);
15552 #undef __MM512_REDUCE_OP
15553 #define __MM512_REDUCE_OP(op) \
15554 __m256i __T1 = (__m256i) _mm512_extracti64x4_epi64 (__A, 1); \
15555 __m256i __T2 = (__m256i) _mm512_extracti64x4_epi64 (__A, 0); \
15556 __m256i __T3 = _mm256_##op (__T1, __T2); \
15557 __m128i __T4 = (__m128i) _mm256_extracti128_si256 (__T3, 1); \
15558 __m128i __T5 = (__m128i) _mm256_extracti128_si256 (__T3, 0); \
15559 __m128i __T6 = _mm_##op (__T4, __T5); \
15560 __m128i __T7 = (__m128i) __builtin_shuffle ((__v4si) __T6, \
15561 (__v4si) { 2, 3, 0, 1 }); \
15562 __m128i __T8 = _mm_##op (__T6, __T7); \
15563 __m128i __T9 = (__m128i) __builtin_shuffle ((__v4si) __T8, \
15564 (__v4si) { 1, 0, 1, 0 }); \
15565 __v4si __T10 = (__v4si) _mm_##op (__T8, __T9); \
15566 return __T10[0]
15568 extern __inline int
15569 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15570 _mm512_reduce_min_epi32 (__m512i __A)
15572 __MM512_REDUCE_OP (min_epi32);
15575 extern __inline int
15576 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15577 _mm512_reduce_max_epi32 (__m512i __A)
15579 __MM512_REDUCE_OP (max_epi32);
15582 extern __inline unsigned int
15583 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15584 _mm512_reduce_min_epu32 (__m512i __A)
15586 __MM512_REDUCE_OP (min_epu32);
15589 extern __inline unsigned int
15590 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15591 _mm512_reduce_max_epu32 (__m512i __A)
15593 __MM512_REDUCE_OP (max_epu32);
15596 extern __inline int
15597 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15598 _mm512_mask_reduce_min_epi32 (__mmask16 __U, __m512i __A)
15600 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (__INT_MAX__), __U, __A);
15601 __MM512_REDUCE_OP (min_epi32);
15604 extern __inline int
15605 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15606 _mm512_mask_reduce_max_epi32 (__mmask16 __U, __m512i __A)
15608 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (-__INT_MAX__ - 1), __U, __A);
15609 __MM512_REDUCE_OP (max_epi32);
15612 extern __inline unsigned int
15613 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15614 _mm512_mask_reduce_min_epu32 (__mmask16 __U, __m512i __A)
15616 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (~0), __U, __A);
15617 __MM512_REDUCE_OP (min_epu32);
15620 extern __inline unsigned int
15621 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15622 _mm512_mask_reduce_max_epu32 (__mmask16 __U, __m512i __A)
15624 __A = _mm512_maskz_mov_epi32 (__U, __A);
15625 __MM512_REDUCE_OP (max_epu32);
15628 #undef __MM512_REDUCE_OP
15629 #define __MM512_REDUCE_OP(op) \
15630 __m256 __T1 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 1); \
15631 __m256 __T2 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 0); \
15632 __m256 __T3 = __T1 op __T2; \
15633 __m128 __T4 = _mm256_extractf128_ps (__T3, 1); \
15634 __m128 __T5 = _mm256_extractf128_ps (__T3, 0); \
15635 __m128 __T6 = __T4 op __T5; \
15636 __m128 __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 }); \
15637 __m128 __T8 = __T6 op __T7; \
15638 return __T8[0] op __T8[1]
15640 extern __inline float
15641 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15642 _mm512_reduce_add_ps (__m512 __A)
15644 __MM512_REDUCE_OP (+);
15647 extern __inline float
15648 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15649 _mm512_reduce_mul_ps (__m512 __A)
15651 __MM512_REDUCE_OP (*);
15654 extern __inline float
15655 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15656 _mm512_mask_reduce_add_ps (__mmask16 __U, __m512 __A)
15658 __A = _mm512_maskz_mov_ps (__U, __A);
15659 __MM512_REDUCE_OP (+);
15662 extern __inline float
15663 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15664 _mm512_mask_reduce_mul_ps (__mmask16 __U, __m512 __A)
15666 __A = _mm512_mask_mov_ps (_mm512_set1_ps (1.0f), __U, __A);
15667 __MM512_REDUCE_OP (*);
15670 #undef __MM512_REDUCE_OP
15671 #define __MM512_REDUCE_OP(op) \
15672 __m256 __T1 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 1); \
15673 __m256 __T2 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 0); \
15674 __m256 __T3 = _mm256_##op (__T1, __T2); \
15675 __m128 __T4 = _mm256_extractf128_ps (__T3, 1); \
15676 __m128 __T5 = _mm256_extractf128_ps (__T3, 0); \
15677 __m128 __T6 = _mm_##op (__T4, __T5); \
15678 __m128 __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 }); \
15679 __m128 __T8 = _mm_##op (__T6, __T7); \
15680 __m128 __T9 = __builtin_shuffle (__T8, (__v4si) { 1, 0, 1, 0 }); \
15681 __m128 __T10 = _mm_##op (__T8, __T9); \
15682 return __T10[0]
15684 extern __inline float
15685 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15686 _mm512_reduce_min_ps (__m512 __A)
15688 __MM512_REDUCE_OP (min_ps);
15691 extern __inline float
15692 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15693 _mm512_reduce_max_ps (__m512 __A)
15695 __MM512_REDUCE_OP (max_ps);
15698 extern __inline float
15699 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15700 _mm512_mask_reduce_min_ps (__mmask16 __U, __m512 __A)
15702 __A = _mm512_mask_mov_ps (_mm512_set1_ps (__builtin_inff ()), __U, __A);
15703 __MM512_REDUCE_OP (min_ps);
15706 extern __inline float
15707 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15708 _mm512_mask_reduce_max_ps (__mmask16 __U, __m512 __A)
15710 __A = _mm512_mask_mov_ps (_mm512_set1_ps (-__builtin_inff ()), __U, __A);
15711 __MM512_REDUCE_OP (max_ps);
15714 #undef __MM512_REDUCE_OP
15715 #define __MM512_REDUCE_OP(op) \
15716 __v4di __T1 = (__v4di) _mm512_extracti64x4_epi64 (__A, 1); \
15717 __v4di __T2 = (__v4di) _mm512_extracti64x4_epi64 (__A, 0); \
15718 __m256i __T3 = (__m256i) (__T1 op __T2); \
15719 __v2di __T4 = (__v2di) _mm256_extracti128_si256 (__T3, 1); \
15720 __v2di __T5 = (__v2di) _mm256_extracti128_si256 (__T3, 0); \
15721 __v2di __T6 = __T4 op __T5; \
15722 return __T6[0] op __T6[1]
15724 extern __inline long long
15725 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15726 _mm512_reduce_add_epi64 (__m512i __A)
15728 __MM512_REDUCE_OP (+);
15731 extern __inline long long
15732 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15733 _mm512_reduce_mul_epi64 (__m512i __A)
15735 __MM512_REDUCE_OP (*);
15738 extern __inline long long
15739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15740 _mm512_reduce_and_epi64 (__m512i __A)
15742 __MM512_REDUCE_OP (&);
15745 extern __inline long long
15746 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15747 _mm512_reduce_or_epi64 (__m512i __A)
15749 __MM512_REDUCE_OP (|);
15752 extern __inline long long
15753 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15754 _mm512_mask_reduce_add_epi64 (__mmask8 __U, __m512i __A)
15756 __A = _mm512_maskz_mov_epi64 (__U, __A);
15757 __MM512_REDUCE_OP (+);
15760 extern __inline long long
15761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15762 _mm512_mask_reduce_mul_epi64 (__mmask8 __U, __m512i __A)
15764 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (1LL), __U, __A);
15765 __MM512_REDUCE_OP (*);
15768 extern __inline long long
15769 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15770 _mm512_mask_reduce_and_epi64 (__mmask8 __U, __m512i __A)
15772 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (~0LL), __U, __A);
15773 __MM512_REDUCE_OP (&);
15776 extern __inline long long
15777 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15778 _mm512_mask_reduce_or_epi64 (__mmask8 __U, __m512i __A)
15780 __A = _mm512_maskz_mov_epi64 (__U, __A);
15781 __MM512_REDUCE_OP (|);
15784 #undef __MM512_REDUCE_OP
15785 #define __MM512_REDUCE_OP(op) \
15786 __m512i __T1 = _mm512_shuffle_i64x2 (__A, __A, 0x4e); \
15787 __m512i __T2 = _mm512_##op (__A, __T1); \
15788 __m512i __T3 \
15789 = (__m512i) __builtin_shuffle ((__v8di) __T2, \
15790 (__v8di) { 2, 3, 0, 1, 6, 7, 4, 5 });\
15791 __m512i __T4 = _mm512_##op (__T2, __T3); \
15792 __m512i __T5 \
15793 = (__m512i) __builtin_shuffle ((__v8di) __T4, \
15794 (__v8di) { 1, 0, 3, 2, 5, 4, 7, 6 });\
15795 __v8di __T6 = (__v8di) _mm512_##op (__T4, __T5); \
15796 return __T6[0]
15798 extern __inline long long
15799 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15800 _mm512_reduce_min_epi64 (__m512i __A)
15802 __MM512_REDUCE_OP (min_epi64);
15805 extern __inline long long
15806 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15807 _mm512_reduce_max_epi64 (__m512i __A)
15809 __MM512_REDUCE_OP (max_epi64);
15812 extern __inline long long
15813 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15814 _mm512_mask_reduce_min_epi64 (__mmask8 __U, __m512i __A)
15816 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (__LONG_LONG_MAX__),
15817 __U, __A);
15818 __MM512_REDUCE_OP (min_epi64);
15821 extern __inline long long
15822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15823 _mm512_mask_reduce_max_epi64 (__mmask8 __U, __m512i __A)
15825 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (-__LONG_LONG_MAX__ - 1),
15826 __U, __A);
15827 __MM512_REDUCE_OP (max_epi64);
15830 extern __inline unsigned long long
15831 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15832 _mm512_reduce_min_epu64 (__m512i __A)
15834 __MM512_REDUCE_OP (min_epu64);
15837 extern __inline unsigned long long
15838 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15839 _mm512_reduce_max_epu64 (__m512i __A)
15841 __MM512_REDUCE_OP (max_epu64);
15844 extern __inline unsigned long long
15845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15846 _mm512_mask_reduce_min_epu64 (__mmask8 __U, __m512i __A)
15848 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (~0LL), __U, __A);
15849 __MM512_REDUCE_OP (min_epu64);
15852 extern __inline unsigned long long
15853 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15854 _mm512_mask_reduce_max_epu64 (__mmask8 __U, __m512i __A)
15856 __A = _mm512_maskz_mov_epi64 (__U, __A);
15857 __MM512_REDUCE_OP (max_epu64);
15860 #undef __MM512_REDUCE_OP
15861 #define __MM512_REDUCE_OP(op) \
15862 __m256d __T1 = (__m256d) _mm512_extractf64x4_pd (__A, 1); \
15863 __m256d __T2 = (__m256d) _mm512_extractf64x4_pd (__A, 0); \
15864 __m256d __T3 = __T1 op __T2; \
15865 __m128d __T4 = _mm256_extractf128_pd (__T3, 1); \
15866 __m128d __T5 = _mm256_extractf128_pd (__T3, 0); \
15867 __m128d __T6 = __T4 op __T5; \
15868 return __T6[0] op __T6[1]
15870 extern __inline double
15871 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15872 _mm512_reduce_add_pd (__m512d __A)
15874 __MM512_REDUCE_OP (+);
15877 extern __inline double
15878 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15879 _mm512_reduce_mul_pd (__m512d __A)
15881 __MM512_REDUCE_OP (*);
15884 extern __inline double
15885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15886 _mm512_mask_reduce_add_pd (__mmask8 __U, __m512d __A)
15888 __A = _mm512_maskz_mov_pd (__U, __A);
15889 __MM512_REDUCE_OP (+);
15892 extern __inline double
15893 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15894 _mm512_mask_reduce_mul_pd (__mmask8 __U, __m512d __A)
15896 __A = _mm512_mask_mov_pd (_mm512_set1_pd (1.0), __U, __A);
15897 __MM512_REDUCE_OP (*);
15900 #undef __MM512_REDUCE_OP
15901 #define __MM512_REDUCE_OP(op) \
15902 __m256d __T1 = (__m256d) _mm512_extractf64x4_pd (__A, 1); \
15903 __m256d __T2 = (__m256d) _mm512_extractf64x4_pd (__A, 0); \
15904 __m256d __T3 = _mm256_##op (__T1, __T2); \
15905 __m128d __T4 = _mm256_extractf128_pd (__T3, 1); \
15906 __m128d __T5 = _mm256_extractf128_pd (__T3, 0); \
15907 __m128d __T6 = _mm_##op (__T4, __T5); \
15908 __m128d __T7 = (__m128d) __builtin_shuffle (__T6, (__v2di) { 1, 0 }); \
15909 __m128d __T8 = _mm_##op (__T6, __T7); \
15910 return __T8[0]
15912 extern __inline double
15913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15914 _mm512_reduce_min_pd (__m512d __A)
15916 __MM512_REDUCE_OP (min_pd);
15919 extern __inline double
15920 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15921 _mm512_reduce_max_pd (__m512d __A)
15923 __MM512_REDUCE_OP (max_pd);
15926 extern __inline double
15927 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15928 _mm512_mask_reduce_min_pd (__mmask8 __U, __m512d __A)
15930 __A = _mm512_mask_mov_pd (_mm512_set1_pd (__builtin_inf ()), __U, __A);
15931 __MM512_REDUCE_OP (min_pd);
15934 extern __inline double
15935 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15936 _mm512_mask_reduce_max_pd (__mmask8 __U, __m512d __A)
15938 __A = _mm512_mask_mov_pd (_mm512_set1_pd (-__builtin_inf ()), __U, __A);
15939 __MM512_REDUCE_OP (max_pd);
15942 #undef __MM512_REDUCE_OP
15944 #ifdef __DISABLE_AVX512F__
15945 #undef __DISABLE_AVX512F__
15946 #pragma GCC pop_options
15947 #endif /* __DISABLE_AVX512F__ */
15949 #endif /* _AVX512FINTRIN_H_INCLUDED */