PR target/87674
[official-gcc.git] / gcc / config / i386 / avx512fintrin.h
blob001d610bc814c2f6018cd92771f6f80cba00d4cb
1 /* Copyright (C) 2013-2018 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
24 #ifndef _IMMINTRIN_H_INCLUDED
25 #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
26 #endif
28 #ifndef _AVX512FINTRIN_H_INCLUDED
29 #define _AVX512FINTRIN_H_INCLUDED
31 #ifndef __AVX512F__
32 #pragma GCC push_options
33 #pragma GCC target("avx512f")
34 #define __DISABLE_AVX512F__
35 #endif /* __AVX512F__ */
37 /* Internal data types for implementing the intrinsics. */
38 typedef double __v8df __attribute__ ((__vector_size__ (64)));
39 typedef float __v16sf __attribute__ ((__vector_size__ (64)));
40 typedef long long __v8di __attribute__ ((__vector_size__ (64)));
41 typedef unsigned long long __v8du __attribute__ ((__vector_size__ (64)));
42 typedef int __v16si __attribute__ ((__vector_size__ (64)));
43 typedef unsigned int __v16su __attribute__ ((__vector_size__ (64)));
44 typedef short __v32hi __attribute__ ((__vector_size__ (64)));
45 typedef unsigned short __v32hu __attribute__ ((__vector_size__ (64)));
46 typedef char __v64qi __attribute__ ((__vector_size__ (64)));
47 typedef unsigned char __v64qu __attribute__ ((__vector_size__ (64)));
49 /* The Intel API is flexible enough that we must allow aliasing with other
50 vector types, and their scalar components. */
51 typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
52 typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
53 typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
55 /* Unaligned version of the same type. */
56 typedef float __m512_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
57 typedef long long __m512i_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
58 typedef double __m512d_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
60 typedef unsigned char __mmask8;
61 typedef unsigned short __mmask16;
63 extern __inline __mmask16
64 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
65 _mm512_int2mask (int __M)
67 return (__mmask16) __M;
70 extern __inline int
71 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
72 _mm512_mask2int (__mmask16 __M)
74 return (int) __M;
77 extern __inline __m512i
78 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79 _mm512_set_epi64 (long long __A, long long __B, long long __C,
80 long long __D, long long __E, long long __F,
81 long long __G, long long __H)
83 return __extension__ (__m512i) (__v8di)
84 { __H, __G, __F, __E, __D, __C, __B, __A };
87 /* Create the vector [A B C D E F G H I J K L M N O P]. */
88 extern __inline __m512i
89 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
90 _mm512_set_epi32 (int __A, int __B, int __C, int __D,
91 int __E, int __F, int __G, int __H,
92 int __I, int __J, int __K, int __L,
93 int __M, int __N, int __O, int __P)
95 return __extension__ (__m512i)(__v16si)
96 { __P, __O, __N, __M, __L, __K, __J, __I,
97 __H, __G, __F, __E, __D, __C, __B, __A };
100 extern __inline __m512i
101 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
102 _mm512_set_epi16 (short __q31, short __q30, short __q29, short __q28,
103 short __q27, short __q26, short __q25, short __q24,
104 short __q23, short __q22, short __q21, short __q20,
105 short __q19, short __q18, short __q17, short __q16,
106 short __q15, short __q14, short __q13, short __q12,
107 short __q11, short __q10, short __q09, short __q08,
108 short __q07, short __q06, short __q05, short __q04,
109 short __q03, short __q02, short __q01, short __q00)
111 return __extension__ (__m512i)(__v32hi){
112 __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
113 __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15,
114 __q16, __q17, __q18, __q19, __q20, __q21, __q22, __q23,
115 __q24, __q25, __q26, __q27, __q28, __q29, __q30, __q31
119 extern __inline __m512i
120 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
121 _mm512_set_epi8 (char __q63, char __q62, char __q61, char __q60,
122 char __q59, char __q58, char __q57, char __q56,
123 char __q55, char __q54, char __q53, char __q52,
124 char __q51, char __q50, char __q49, char __q48,
125 char __q47, char __q46, char __q45, char __q44,
126 char __q43, char __q42, char __q41, char __q40,
127 char __q39, char __q38, char __q37, char __q36,
128 char __q35, char __q34, char __q33, char __q32,
129 char __q31, char __q30, char __q29, char __q28,
130 char __q27, char __q26, char __q25, char __q24,
131 char __q23, char __q22, char __q21, char __q20,
132 char __q19, char __q18, char __q17, char __q16,
133 char __q15, char __q14, char __q13, char __q12,
134 char __q11, char __q10, char __q09, char __q08,
135 char __q07, char __q06, char __q05, char __q04,
136 char __q03, char __q02, char __q01, char __q00)
138 return __extension__ (__m512i)(__v64qi){
139 __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
140 __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15,
141 __q16, __q17, __q18, __q19, __q20, __q21, __q22, __q23,
142 __q24, __q25, __q26, __q27, __q28, __q29, __q30, __q31,
143 __q32, __q33, __q34, __q35, __q36, __q37, __q38, __q39,
144 __q40, __q41, __q42, __q43, __q44, __q45, __q46, __q47,
145 __q48, __q49, __q50, __q51, __q52, __q53, __q54, __q55,
146 __q56, __q57, __q58, __q59, __q60, __q61, __q62, __q63
150 extern __inline __m512d
151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
152 _mm512_set_pd (double __A, double __B, double __C, double __D,
153 double __E, double __F, double __G, double __H)
155 return __extension__ (__m512d)
156 { __H, __G, __F, __E, __D, __C, __B, __A };
159 extern __inline __m512
160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
161 _mm512_set_ps (float __A, float __B, float __C, float __D,
162 float __E, float __F, float __G, float __H,
163 float __I, float __J, float __K, float __L,
164 float __M, float __N, float __O, float __P)
166 return __extension__ (__m512)
167 { __P, __O, __N, __M, __L, __K, __J, __I,
168 __H, __G, __F, __E, __D, __C, __B, __A };
171 #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \
172 _mm512_set_epi64(e7,e6,e5,e4,e3,e2,e1,e0)
174 #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \
175 e8,e9,e10,e11,e12,e13,e14,e15) \
176 _mm512_set_epi32(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
178 #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \
179 _mm512_set_pd(e7,e6,e5,e4,e3,e2,e1,e0)
181 #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
182 _mm512_set_ps(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
184 extern __inline __m512
185 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
186 _mm512_undefined_ps (void)
188 __m512 __Y = __Y;
189 return __Y;
192 #define _mm512_undefined _mm512_undefined_ps
194 extern __inline __m512d
195 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
196 _mm512_undefined_pd (void)
198 __m512d __Y = __Y;
199 return __Y;
202 extern __inline __m512i
203 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
204 _mm512_undefined_epi32 (void)
206 __m512i __Y = __Y;
207 return __Y;
210 #define _mm512_undefined_si512 _mm512_undefined_epi32
212 extern __inline __m512i
213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
214 _mm512_set1_epi8 (char __A)
216 return __extension__ (__m512i)(__v64qi)
217 { __A, __A, __A, __A, __A, __A, __A, __A,
218 __A, __A, __A, __A, __A, __A, __A, __A,
219 __A, __A, __A, __A, __A, __A, __A, __A,
220 __A, __A, __A, __A, __A, __A, __A, __A,
221 __A, __A, __A, __A, __A, __A, __A, __A,
222 __A, __A, __A, __A, __A, __A, __A, __A,
223 __A, __A, __A, __A, __A, __A, __A, __A,
224 __A, __A, __A, __A, __A, __A, __A, __A };
227 extern __inline __m512i
228 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
229 _mm512_set1_epi16 (short __A)
231 return __extension__ (__m512i)(__v32hi)
232 { __A, __A, __A, __A, __A, __A, __A, __A,
233 __A, __A, __A, __A, __A, __A, __A, __A,
234 __A, __A, __A, __A, __A, __A, __A, __A,
235 __A, __A, __A, __A, __A, __A, __A, __A };
238 extern __inline __m512d
239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
240 _mm512_set1_pd (double __A)
242 return (__m512d) __builtin_ia32_broadcastsd512 (__extension__
243 (__v2df) { __A, },
244 (__v8df)
245 _mm512_undefined_pd (),
246 (__mmask8) -1);
249 extern __inline __m512
250 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
251 _mm512_set1_ps (float __A)
253 return (__m512) __builtin_ia32_broadcastss512 (__extension__
254 (__v4sf) { __A, },
255 (__v16sf)
256 _mm512_undefined_ps (),
257 (__mmask16) -1);
260 /* Create the vector [A B C D A B C D A B C D A B C D]. */
261 extern __inline __m512i
262 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
263 _mm512_set4_epi32 (int __A, int __B, int __C, int __D)
265 return __extension__ (__m512i)(__v16si)
266 { __D, __C, __B, __A, __D, __C, __B, __A,
267 __D, __C, __B, __A, __D, __C, __B, __A };
270 extern __inline __m512i
271 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
272 _mm512_set4_epi64 (long long __A, long long __B, long long __C,
273 long long __D)
275 return __extension__ (__m512i) (__v8di)
276 { __D, __C, __B, __A, __D, __C, __B, __A };
279 extern __inline __m512d
280 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
281 _mm512_set4_pd (double __A, double __B, double __C, double __D)
283 return __extension__ (__m512d)
284 { __D, __C, __B, __A, __D, __C, __B, __A };
287 extern __inline __m512
288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
289 _mm512_set4_ps (float __A, float __B, float __C, float __D)
291 return __extension__ (__m512)
292 { __D, __C, __B, __A, __D, __C, __B, __A,
293 __D, __C, __B, __A, __D, __C, __B, __A };
296 #define _mm512_setr4_epi64(e0,e1,e2,e3) \
297 _mm512_set4_epi64(e3,e2,e1,e0)
299 #define _mm512_setr4_epi32(e0,e1,e2,e3) \
300 _mm512_set4_epi32(e3,e2,e1,e0)
302 #define _mm512_setr4_pd(e0,e1,e2,e3) \
303 _mm512_set4_pd(e3,e2,e1,e0)
305 #define _mm512_setr4_ps(e0,e1,e2,e3) \
306 _mm512_set4_ps(e3,e2,e1,e0)
308 extern __inline __m512
309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
310 _mm512_setzero_ps (void)
312 return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
313 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
316 extern __inline __m512
317 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
318 _mm512_setzero (void)
320 return _mm512_setzero_ps ();
323 extern __inline __m512d
324 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
325 _mm512_setzero_pd (void)
327 return __extension__ (__m512d) { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
330 extern __inline __m512i
331 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
332 _mm512_setzero_epi32 (void)
334 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
337 extern __inline __m512i
338 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
339 _mm512_setzero_si512 (void)
341 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
344 extern __inline __m512d
345 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
346 _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
348 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
349 (__v8df) __W,
350 (__mmask8) __U);
353 extern __inline __m512d
354 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
355 _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
357 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
358 (__v8df)
359 _mm512_setzero_pd (),
360 (__mmask8) __U);
363 extern __inline __m512
364 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
365 _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
367 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
368 (__v16sf) __W,
369 (__mmask16) __U);
372 extern __inline __m512
373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
374 _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
376 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
377 (__v16sf)
378 _mm512_setzero_ps (),
379 (__mmask16) __U);
382 extern __inline __m512d
383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
384 _mm512_load_pd (void const *__P)
386 return *(__m512d *) __P;
389 extern __inline __m512d
390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
391 _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
393 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
394 (__v8df) __W,
395 (__mmask8) __U);
398 extern __inline __m512d
399 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
400 _mm512_maskz_load_pd (__mmask8 __U, void const *__P)
402 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
403 (__v8df)
404 _mm512_setzero_pd (),
405 (__mmask8) __U);
408 extern __inline void
409 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
410 _mm512_store_pd (void *__P, __m512d __A)
412 *(__m512d *) __P = __A;
415 extern __inline void
416 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
417 _mm512_mask_store_pd (void *__P, __mmask8 __U, __m512d __A)
419 __builtin_ia32_storeapd512_mask ((__v8df *) __P, (__v8df) __A,
420 (__mmask8) __U);
423 extern __inline __m512
424 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
425 _mm512_load_ps (void const *__P)
427 return *(__m512 *) __P;
430 extern __inline __m512
431 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
432 _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
434 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
435 (__v16sf) __W,
436 (__mmask16) __U);
439 extern __inline __m512
440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
441 _mm512_maskz_load_ps (__mmask16 __U, void const *__P)
443 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
444 (__v16sf)
445 _mm512_setzero_ps (),
446 (__mmask16) __U);
449 extern __inline void
450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
451 _mm512_store_ps (void *__P, __m512 __A)
453 *(__m512 *) __P = __A;
456 extern __inline void
457 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
458 _mm512_mask_store_ps (void *__P, __mmask16 __U, __m512 __A)
460 __builtin_ia32_storeaps512_mask ((__v16sf *) __P, (__v16sf) __A,
461 (__mmask16) __U);
464 extern __inline __m512i
465 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
466 _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
468 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
469 (__v8di) __W,
470 (__mmask8) __U);
473 extern __inline __m512i
474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
475 _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
477 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
478 (__v8di)
479 _mm512_setzero_si512 (),
480 (__mmask8) __U);
483 extern __inline __m512i
484 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
485 _mm512_load_epi64 (void const *__P)
487 return *(__m512i *) __P;
490 extern __inline __m512i
491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
492 _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
494 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
495 (__v8di) __W,
496 (__mmask8) __U);
499 extern __inline __m512i
500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
501 _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
503 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
504 (__v8di)
505 _mm512_setzero_si512 (),
506 (__mmask8) __U);
509 extern __inline void
510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
511 _mm512_store_epi64 (void *__P, __m512i __A)
513 *(__m512i *) __P = __A;
516 extern __inline void
517 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
518 _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
520 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
521 (__mmask8) __U);
524 extern __inline __m512i
525 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
526 _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
528 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
529 (__v16si) __W,
530 (__mmask16) __U);
533 extern __inline __m512i
534 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
535 _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
537 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
538 (__v16si)
539 _mm512_setzero_si512 (),
540 (__mmask16) __U);
543 extern __inline __m512i
544 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
545 _mm512_load_si512 (void const *__P)
547 return *(__m512i *) __P;
550 extern __inline __m512i
551 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
552 _mm512_load_epi32 (void const *__P)
554 return *(__m512i *) __P;
557 extern __inline __m512i
558 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
559 _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
561 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
562 (__v16si) __W,
563 (__mmask16) __U);
566 extern __inline __m512i
567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
568 _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
570 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
571 (__v16si)
572 _mm512_setzero_si512 (),
573 (__mmask16) __U);
576 extern __inline void
577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
578 _mm512_store_si512 (void *__P, __m512i __A)
580 *(__m512i *) __P = __A;
583 extern __inline void
584 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
585 _mm512_store_epi32 (void *__P, __m512i __A)
587 *(__m512i *) __P = __A;
590 extern __inline void
591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
592 _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
594 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
595 (__mmask16) __U);
598 extern __inline __m512i
599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
600 _mm512_mullo_epi32 (__m512i __A, __m512i __B)
602 return (__m512i) ((__v16su) __A * (__v16su) __B);
605 extern __inline __m512i
606 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
607 _mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
609 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
610 (__v16si) __B,
611 (__v16si)
612 _mm512_setzero_si512 (),
613 __M);
616 extern __inline __m512i
617 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
618 _mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
620 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
621 (__v16si) __B,
622 (__v16si) __W, __M);
625 extern __inline __m512i
626 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
627 _mm512_mullox_epi64 (__m512i __A, __m512i __B)
629 return (__m512i) ((__v8du) __A * (__v8du) __B);
632 extern __inline __m512i
633 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
634 _mm512_mask_mullox_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
636 return _mm512_mask_mov_epi64 (__W, __M, _mm512_mullox_epi64 (__A, __B));
639 extern __inline __m512i
640 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
641 _mm512_sllv_epi32 (__m512i __X, __m512i __Y)
643 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
644 (__v16si) __Y,
645 (__v16si)
646 _mm512_undefined_epi32 (),
647 (__mmask16) -1);
650 extern __inline __m512i
651 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
652 _mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
654 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
655 (__v16si) __Y,
656 (__v16si) __W,
657 (__mmask16) __U);
660 extern __inline __m512i
661 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
662 _mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
664 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
665 (__v16si) __Y,
666 (__v16si)
667 _mm512_setzero_si512 (),
668 (__mmask16) __U);
671 extern __inline __m512i
672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
673 _mm512_srav_epi32 (__m512i __X, __m512i __Y)
675 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
676 (__v16si) __Y,
677 (__v16si)
678 _mm512_undefined_epi32 (),
679 (__mmask16) -1);
682 extern __inline __m512i
683 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
684 _mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
686 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
687 (__v16si) __Y,
688 (__v16si) __W,
689 (__mmask16) __U);
692 extern __inline __m512i
693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
694 _mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
696 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
697 (__v16si) __Y,
698 (__v16si)
699 _mm512_setzero_si512 (),
700 (__mmask16) __U);
703 extern __inline __m512i
704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
705 _mm512_srlv_epi32 (__m512i __X, __m512i __Y)
707 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
708 (__v16si) __Y,
709 (__v16si)
710 _mm512_undefined_epi32 (),
711 (__mmask16) -1);
714 extern __inline __m512i
715 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
716 _mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
718 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
719 (__v16si) __Y,
720 (__v16si) __W,
721 (__mmask16) __U);
724 extern __inline __m512i
725 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
726 _mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
728 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
729 (__v16si) __Y,
730 (__v16si)
731 _mm512_setzero_si512 (),
732 (__mmask16) __U);
735 extern __inline __m512i
736 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
737 _mm512_add_epi64 (__m512i __A, __m512i __B)
739 return (__m512i) ((__v8du) __A + (__v8du) __B);
742 extern __inline __m512i
743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
744 _mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
746 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
747 (__v8di) __B,
748 (__v8di) __W,
749 (__mmask8) __U);
752 extern __inline __m512i
753 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
754 _mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
756 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
757 (__v8di) __B,
758 (__v8di)
759 _mm512_setzero_si512 (),
760 (__mmask8) __U);
763 extern __inline __m512i
764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
765 _mm512_sub_epi64 (__m512i __A, __m512i __B)
767 return (__m512i) ((__v8du) __A - (__v8du) __B);
770 extern __inline __m512i
771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
772 _mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
774 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
775 (__v8di) __B,
776 (__v8di) __W,
777 (__mmask8) __U);
780 extern __inline __m512i
781 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
782 _mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
784 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
785 (__v8di) __B,
786 (__v8di)
787 _mm512_setzero_si512 (),
788 (__mmask8) __U);
791 extern __inline __m512i
792 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
793 _mm512_sllv_epi64 (__m512i __X, __m512i __Y)
795 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
796 (__v8di) __Y,
797 (__v8di)
798 _mm512_undefined_pd (),
799 (__mmask8) -1);
802 extern __inline __m512i
803 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
804 _mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
806 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
807 (__v8di) __Y,
808 (__v8di) __W,
809 (__mmask8) __U);
812 extern __inline __m512i
813 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
814 _mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
816 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
817 (__v8di) __Y,
818 (__v8di)
819 _mm512_setzero_si512 (),
820 (__mmask8) __U);
823 extern __inline __m512i
824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
825 _mm512_srav_epi64 (__m512i __X, __m512i __Y)
827 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
828 (__v8di) __Y,
829 (__v8di)
830 _mm512_undefined_epi32 (),
831 (__mmask8) -1);
834 extern __inline __m512i
835 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
836 _mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
838 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
839 (__v8di) __Y,
840 (__v8di) __W,
841 (__mmask8) __U);
844 extern __inline __m512i
845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
846 _mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
848 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
849 (__v8di) __Y,
850 (__v8di)
851 _mm512_setzero_si512 (),
852 (__mmask8) __U);
855 extern __inline __m512i
856 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
857 _mm512_srlv_epi64 (__m512i __X, __m512i __Y)
859 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
860 (__v8di) __Y,
861 (__v8di)
862 _mm512_undefined_epi32 (),
863 (__mmask8) -1);
866 extern __inline __m512i
867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
868 _mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
870 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
871 (__v8di) __Y,
872 (__v8di) __W,
873 (__mmask8) __U);
876 extern __inline __m512i
877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
878 _mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
880 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
881 (__v8di) __Y,
882 (__v8di)
883 _mm512_setzero_si512 (),
884 (__mmask8) __U);
887 extern __inline __m512i
888 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
889 _mm512_add_epi32 (__m512i __A, __m512i __B)
891 return (__m512i) ((__v16su) __A + (__v16su) __B);
894 extern __inline __m512i
895 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
896 _mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
898 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
899 (__v16si) __B,
900 (__v16si) __W,
901 (__mmask16) __U);
904 extern __inline __m512i
905 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
906 _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
908 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
909 (__v16si) __B,
910 (__v16si)
911 _mm512_setzero_si512 (),
912 (__mmask16) __U);
915 extern __inline __m512i
916 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
917 _mm512_mul_epi32 (__m512i __X, __m512i __Y)
919 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
920 (__v16si) __Y,
921 (__v8di)
922 _mm512_undefined_epi32 (),
923 (__mmask8) -1);
926 extern __inline __m512i
927 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
928 _mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
930 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
931 (__v16si) __Y,
932 (__v8di) __W, __M);
935 extern __inline __m512i
936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
937 _mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
939 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
940 (__v16si) __Y,
941 (__v8di)
942 _mm512_setzero_si512 (),
943 __M);
946 extern __inline __m512i
947 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
948 _mm512_sub_epi32 (__m512i __A, __m512i __B)
950 return (__m512i) ((__v16su) __A - (__v16su) __B);
953 extern __inline __m512i
954 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
955 _mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
957 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
958 (__v16si) __B,
959 (__v16si) __W,
960 (__mmask16) __U);
963 extern __inline __m512i
964 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
965 _mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
967 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
968 (__v16si) __B,
969 (__v16si)
970 _mm512_setzero_si512 (),
971 (__mmask16) __U);
974 extern __inline __m512i
975 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
976 _mm512_mul_epu32 (__m512i __X, __m512i __Y)
978 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
979 (__v16si) __Y,
980 (__v8di)
981 _mm512_undefined_epi32 (),
982 (__mmask8) -1);
985 extern __inline __m512i
986 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
987 _mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
989 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
990 (__v16si) __Y,
991 (__v8di) __W, __M);
994 extern __inline __m512i
995 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
996 _mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
998 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
999 (__v16si) __Y,
1000 (__v8di)
1001 _mm512_setzero_si512 (),
1002 __M);
1005 #ifdef __OPTIMIZE__
1006 extern __inline __m512i
1007 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1008 _mm512_slli_epi64 (__m512i __A, unsigned int __B)
1010 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
1011 (__v8di)
1012 _mm512_undefined_epi32 (),
1013 (__mmask8) -1);
1016 extern __inline __m512i
1017 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1018 _mm512_mask_slli_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
1019 unsigned int __B)
1021 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
1022 (__v8di) __W,
1023 (__mmask8) __U);
1026 extern __inline __m512i
1027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1028 _mm512_maskz_slli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1030 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
1031 (__v8di)
1032 _mm512_setzero_si512 (),
1033 (__mmask8) __U);
1035 #else
1036 #define _mm512_slli_epi64(X, C) \
1037 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1038 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
1039 (__mmask8)-1))
1041 #define _mm512_mask_slli_epi64(W, U, X, C) \
1042 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1043 (__v8di)(__m512i)(W),\
1044 (__mmask8)(U)))
1046 #define _mm512_maskz_slli_epi64(U, X, C) \
1047 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1048 (__v8di)(__m512i)_mm512_setzero_si512 (),\
1049 (__mmask8)(U)))
1050 #endif
1052 extern __inline __m512i
1053 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1054 _mm512_sll_epi64 (__m512i __A, __m128i __B)
1056 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
1057 (__v2di) __B,
1058 (__v8di)
1059 _mm512_undefined_epi32 (),
1060 (__mmask8) -1);
1063 extern __inline __m512i
1064 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1065 _mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1067 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
1068 (__v2di) __B,
1069 (__v8di) __W,
1070 (__mmask8) __U);
1073 extern __inline __m512i
1074 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1075 _mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1077 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
1078 (__v2di) __B,
1079 (__v8di)
1080 _mm512_setzero_si512 (),
1081 (__mmask8) __U);
1084 #ifdef __OPTIMIZE__
1085 extern __inline __m512i
1086 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1087 _mm512_srli_epi64 (__m512i __A, unsigned int __B)
1089 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1090 (__v8di)
1091 _mm512_undefined_epi32 (),
1092 (__mmask8) -1);
1095 extern __inline __m512i
1096 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1097 _mm512_mask_srli_epi64 (__m512i __W, __mmask8 __U,
1098 __m512i __A, unsigned int __B)
1100 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1101 (__v8di) __W,
1102 (__mmask8) __U);
1105 extern __inline __m512i
1106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1107 _mm512_maskz_srli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1109 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1110 (__v8di)
1111 _mm512_setzero_si512 (),
1112 (__mmask8) __U);
1114 #else
1115 #define _mm512_srli_epi64(X, C) \
1116 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1117 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
1118 (__mmask8)-1))
1120 #define _mm512_mask_srli_epi64(W, U, X, C) \
1121 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1122 (__v8di)(__m512i)(W),\
1123 (__mmask8)(U)))
1125 #define _mm512_maskz_srli_epi64(U, X, C) \
1126 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1127 (__v8di)(__m512i)_mm512_setzero_si512 (),\
1128 (__mmask8)(U)))
1129 #endif
1131 extern __inline __m512i
1132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1133 _mm512_srl_epi64 (__m512i __A, __m128i __B)
1135 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1136 (__v2di) __B,
1137 (__v8di)
1138 _mm512_undefined_epi32 (),
1139 (__mmask8) -1);
1142 extern __inline __m512i
1143 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1144 _mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1146 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1147 (__v2di) __B,
1148 (__v8di) __W,
1149 (__mmask8) __U);
1152 extern __inline __m512i
1153 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1154 _mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1156 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1157 (__v2di) __B,
1158 (__v8di)
1159 _mm512_setzero_si512 (),
1160 (__mmask8) __U);
1163 #ifdef __OPTIMIZE__
1164 extern __inline __m512i
1165 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1166 _mm512_srai_epi64 (__m512i __A, unsigned int __B)
1168 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1169 (__v8di)
1170 _mm512_undefined_epi32 (),
1171 (__mmask8) -1);
1174 extern __inline __m512i
1175 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1176 _mm512_mask_srai_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
1177 unsigned int __B)
1179 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1180 (__v8di) __W,
1181 (__mmask8) __U);
1184 extern __inline __m512i
1185 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1186 _mm512_maskz_srai_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1188 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1189 (__v8di)
1190 _mm512_setzero_si512 (),
1191 (__mmask8) __U);
1193 #else
1194 #define _mm512_srai_epi64(X, C) \
1195 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1196 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
1197 (__mmask8)-1))
1199 #define _mm512_mask_srai_epi64(W, U, X, C) \
1200 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1201 (__v8di)(__m512i)(W),\
1202 (__mmask8)(U)))
1204 #define _mm512_maskz_srai_epi64(U, X, C) \
1205 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1206 (__v8di)(__m512i)_mm512_setzero_si512 (),\
1207 (__mmask8)(U)))
1208 #endif
1210 extern __inline __m512i
1211 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1212 _mm512_sra_epi64 (__m512i __A, __m128i __B)
1214 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1215 (__v2di) __B,
1216 (__v8di)
1217 _mm512_undefined_epi32 (),
1218 (__mmask8) -1);
1221 extern __inline __m512i
1222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1223 _mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1225 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1226 (__v2di) __B,
1227 (__v8di) __W,
1228 (__mmask8) __U);
1231 extern __inline __m512i
1232 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1233 _mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1235 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1236 (__v2di) __B,
1237 (__v8di)
1238 _mm512_setzero_si512 (),
1239 (__mmask8) __U);
1242 #ifdef __OPTIMIZE__
1243 extern __inline __m512i
1244 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1245 _mm512_slli_epi32 (__m512i __A, unsigned int __B)
1247 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1248 (__v16si)
1249 _mm512_undefined_epi32 (),
1250 (__mmask16) -1);
1253 extern __inline __m512i
1254 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1255 _mm512_mask_slli_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1256 unsigned int __B)
1258 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1259 (__v16si) __W,
1260 (__mmask16) __U);
1263 extern __inline __m512i
1264 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1265 _mm512_maskz_slli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1267 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1268 (__v16si)
1269 _mm512_setzero_si512 (),
1270 (__mmask16) __U);
1272 #else
1273 #define _mm512_slli_epi32(X, C) \
1274 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1275 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
1276 (__mmask16)-1))
1278 #define _mm512_mask_slli_epi32(W, U, X, C) \
1279 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1280 (__v16si)(__m512i)(W),\
1281 (__mmask16)(U)))
1283 #define _mm512_maskz_slli_epi32(U, X, C) \
1284 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1285 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1286 (__mmask16)(U)))
1287 #endif
1289 extern __inline __m512i
1290 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1291 _mm512_sll_epi32 (__m512i __A, __m128i __B)
1293 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1294 (__v4si) __B,
1295 (__v16si)
1296 _mm512_undefined_epi32 (),
1297 (__mmask16) -1);
1300 extern __inline __m512i
1301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1302 _mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1304 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1305 (__v4si) __B,
1306 (__v16si) __W,
1307 (__mmask16) __U);
1310 extern __inline __m512i
1311 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1312 _mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1314 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1315 (__v4si) __B,
1316 (__v16si)
1317 _mm512_setzero_si512 (),
1318 (__mmask16) __U);
1321 #ifdef __OPTIMIZE__
1322 extern __inline __m512i
1323 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1324 _mm512_srli_epi32 (__m512i __A, unsigned int __B)
1326 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1327 (__v16si)
1328 _mm512_undefined_epi32 (),
1329 (__mmask16) -1);
1332 extern __inline __m512i
1333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1334 _mm512_mask_srli_epi32 (__m512i __W, __mmask16 __U,
1335 __m512i __A, unsigned int __B)
1337 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1338 (__v16si) __W,
1339 (__mmask16) __U);
1342 extern __inline __m512i
1343 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1344 _mm512_maskz_srli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1346 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1347 (__v16si)
1348 _mm512_setzero_si512 (),
1349 (__mmask16) __U);
1351 #else
1352 #define _mm512_srli_epi32(X, C) \
1353 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1354 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
1355 (__mmask16)-1))
1357 #define _mm512_mask_srli_epi32(W, U, X, C) \
1358 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1359 (__v16si)(__m512i)(W),\
1360 (__mmask16)(U)))
1362 #define _mm512_maskz_srli_epi32(U, X, C) \
1363 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1364 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1365 (__mmask16)(U)))
1366 #endif
1368 extern __inline __m512i
1369 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1370 _mm512_srl_epi32 (__m512i __A, __m128i __B)
1372 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1373 (__v4si) __B,
1374 (__v16si)
1375 _mm512_undefined_epi32 (),
1376 (__mmask16) -1);
1379 extern __inline __m512i
1380 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1381 _mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1383 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1384 (__v4si) __B,
1385 (__v16si) __W,
1386 (__mmask16) __U);
1389 extern __inline __m512i
1390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1391 _mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1393 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1394 (__v4si) __B,
1395 (__v16si)
1396 _mm512_setzero_si512 (),
1397 (__mmask16) __U);
1400 #ifdef __OPTIMIZE__
1401 extern __inline __m512i
1402 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1403 _mm512_srai_epi32 (__m512i __A, unsigned int __B)
1405 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1406 (__v16si)
1407 _mm512_undefined_epi32 (),
1408 (__mmask16) -1);
1411 extern __inline __m512i
1412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1413 _mm512_mask_srai_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1414 unsigned int __B)
1416 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1417 (__v16si) __W,
1418 (__mmask16) __U);
1421 extern __inline __m512i
1422 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1423 _mm512_maskz_srai_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1425 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1426 (__v16si)
1427 _mm512_setzero_si512 (),
1428 (__mmask16) __U);
1430 #else
1431 #define _mm512_srai_epi32(X, C) \
1432 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1433 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
1434 (__mmask16)-1))
1436 #define _mm512_mask_srai_epi32(W, U, X, C) \
1437 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1438 (__v16si)(__m512i)(W),\
1439 (__mmask16)(U)))
1441 #define _mm512_maskz_srai_epi32(U, X, C) \
1442 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1443 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1444 (__mmask16)(U)))
1445 #endif
1447 extern __inline __m512i
1448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1449 _mm512_sra_epi32 (__m512i __A, __m128i __B)
1451 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1452 (__v4si) __B,
1453 (__v16si)
1454 _mm512_undefined_epi32 (),
1455 (__mmask16) -1);
1458 extern __inline __m512i
1459 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1460 _mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1462 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1463 (__v4si) __B,
1464 (__v16si) __W,
1465 (__mmask16) __U);
1468 extern __inline __m512i
1469 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1470 _mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1472 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1473 (__v4si) __B,
1474 (__v16si)
1475 _mm512_setzero_si512 (),
1476 (__mmask16) __U);
1479 #ifdef __OPTIMIZE__
1480 extern __inline __m128d
1481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1482 _mm_add_round_sd (__m128d __A, __m128d __B, const int __R)
1484 return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
1485 (__v2df) __B,
1486 __R);
1489 extern __inline __m128d
1490 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1491 _mm_mask_add_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
1492 __m128d __B, const int __R)
1494 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
1495 (__v2df) __B,
1496 (__v2df) __W,
1497 (__mmask8) __U, __R);
1500 extern __inline __m128d
1501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1502 _mm_maskz_add_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
1503 const int __R)
1505 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
1506 (__v2df) __B,
1507 (__v2df)
1508 _mm_setzero_pd (),
1509 (__mmask8) __U, __R);
1512 extern __inline __m128
1513 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1514 _mm_add_round_ss (__m128 __A, __m128 __B, const int __R)
1516 return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
1517 (__v4sf) __B,
1518 __R);
1521 extern __inline __m128
1522 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1523 _mm_mask_add_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
1524 __m128 __B, const int __R)
1526 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
1527 (__v4sf) __B,
1528 (__v4sf) __W,
1529 (__mmask8) __U, __R);
1532 extern __inline __m128
1533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1534 _mm_maskz_add_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
1535 const int __R)
1537 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
1538 (__v4sf) __B,
1539 (__v4sf)
1540 _mm_setzero_ps (),
1541 (__mmask8) __U, __R);
1544 extern __inline __m128d
1545 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1546 _mm_sub_round_sd (__m128d __A, __m128d __B, const int __R)
1548 return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
1549 (__v2df) __B,
1550 __R);
1553 extern __inline __m128d
1554 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1555 _mm_mask_sub_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
1556 __m128d __B, const int __R)
1558 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
1559 (__v2df) __B,
1560 (__v2df) __W,
1561 (__mmask8) __U, __R);
1564 extern __inline __m128d
1565 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1566 _mm_maskz_sub_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
1567 const int __R)
1569 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
1570 (__v2df) __B,
1571 (__v2df)
1572 _mm_setzero_pd (),
1573 (__mmask8) __U, __R);
1576 extern __inline __m128
1577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1578 _mm_sub_round_ss (__m128 __A, __m128 __B, const int __R)
1580 return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
1581 (__v4sf) __B,
1582 __R);
1585 extern __inline __m128
1586 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1587 _mm_mask_sub_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
1588 __m128 __B, const int __R)
1590 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
1591 (__v4sf) __B,
1592 (__v4sf) __W,
1593 (__mmask8) __U, __R);
1596 extern __inline __m128
1597 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1598 _mm_maskz_sub_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
1599 const int __R)
1601 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
1602 (__v4sf) __B,
1603 (__v4sf)
1604 _mm_setzero_ps (),
1605 (__mmask8) __U, __R);
1608 #else
1609 #define _mm_add_round_sd(A, B, C) \
1610 (__m128d)__builtin_ia32_addsd_round(A, B, C)
1612 #define _mm_mask_add_round_sd(W, U, A, B, C) \
1613 (__m128d)__builtin_ia32_addsd_mask_round(A, B, W, U, C)
1615 #define _mm_maskz_add_round_sd(U, A, B, C) \
1616 (__m128d)__builtin_ia32_addsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
1618 #define _mm_add_round_ss(A, B, C) \
1619 (__m128)__builtin_ia32_addss_round(A, B, C)
1621 #define _mm_mask_add_round_ss(W, U, A, B, C) \
1622 (__m128)__builtin_ia32_addss_mask_round(A, B, W, U, C)
1624 #define _mm_maskz_add_round_ss(U, A, B, C) \
1625 (__m128)__builtin_ia32_addss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
1627 #define _mm_sub_round_sd(A, B, C) \
1628 (__m128d)__builtin_ia32_subsd_round(A, B, C)
1630 #define _mm_mask_sub_round_sd(W, U, A, B, C) \
1631 (__m128d)__builtin_ia32_subsd_mask_round(A, B, W, U, C)
1633 #define _mm_maskz_sub_round_sd(U, A, B, C) \
1634 (__m128d)__builtin_ia32_subsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
1636 #define _mm_sub_round_ss(A, B, C) \
1637 (__m128)__builtin_ia32_subss_round(A, B, C)
1639 #define _mm_mask_sub_round_ss(W, U, A, B, C) \
1640 (__m128)__builtin_ia32_subss_mask_round(A, B, W, U, C)
1642 #define _mm_maskz_sub_round_ss(U, A, B, C) \
1643 (__m128)__builtin_ia32_subss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
1645 #endif
1647 #ifdef __OPTIMIZE__
1648 extern __inline __m512i
1649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1650 _mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C,
1651 const int __imm)
1653 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1654 (__v8di) __B,
1655 (__v8di) __C, __imm,
1656 (__mmask8) -1);
1659 extern __inline __m512i
1660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1661 _mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B,
1662 __m512i __C, const int __imm)
1664 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1665 (__v8di) __B,
1666 (__v8di) __C, __imm,
1667 (__mmask8) __U);
1670 extern __inline __m512i
1671 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1672 _mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
1673 __m512i __C, const int __imm)
1675 return (__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di) __A,
1676 (__v8di) __B,
1677 (__v8di) __C,
1678 __imm, (__mmask8) __U);
1681 extern __inline __m512i
1682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1683 _mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C,
1684 const int __imm)
1686 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1687 (__v16si) __B,
1688 (__v16si) __C,
1689 __imm, (__mmask16) -1);
1692 extern __inline __m512i
1693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1694 _mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
1695 __m512i __C, const int __imm)
1697 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1698 (__v16si) __B,
1699 (__v16si) __C,
1700 __imm, (__mmask16) __U);
1703 extern __inline __m512i
1704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1705 _mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
1706 __m512i __C, const int __imm)
1708 return (__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si) __A,
1709 (__v16si) __B,
1710 (__v16si) __C,
1711 __imm, (__mmask16) __U);
1713 #else
1714 #define _mm512_ternarylogic_epi64(A, B, C, I) \
1715 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
1716 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)-1))
1717 #define _mm512_mask_ternarylogic_epi64(A, U, B, C, I) \
1718 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
1719 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1720 #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I) \
1721 ((__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di)(__m512i)(A), \
1722 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1723 #define _mm512_ternarylogic_epi32(A, B, C, I) \
1724 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
1725 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1726 (__mmask16)-1))
1727 #define _mm512_mask_ternarylogic_epi32(A, U, B, C, I) \
1728 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
1729 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1730 (__mmask16)(U)))
1731 #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I) \
1732 ((__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si)(__m512i)(A), \
1733 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1734 (__mmask16)(U)))
1735 #endif
1737 extern __inline __m512d
1738 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1739 _mm512_rcp14_pd (__m512d __A)
1741 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1742 (__v8df)
1743 _mm512_undefined_pd (),
1744 (__mmask8) -1);
1747 extern __inline __m512d
1748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1749 _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1751 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1752 (__v8df) __W,
1753 (__mmask8) __U);
1756 extern __inline __m512d
1757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1758 _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
1760 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1761 (__v8df)
1762 _mm512_setzero_pd (),
1763 (__mmask8) __U);
1766 extern __inline __m512
1767 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1768 _mm512_rcp14_ps (__m512 __A)
1770 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1771 (__v16sf)
1772 _mm512_undefined_ps (),
1773 (__mmask16) -1);
1776 extern __inline __m512
1777 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1778 _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1780 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1781 (__v16sf) __W,
1782 (__mmask16) __U);
1785 extern __inline __m512
1786 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1787 _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
1789 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1790 (__v16sf)
1791 _mm512_setzero_ps (),
1792 (__mmask16) __U);
1795 extern __inline __m128d
1796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1797 _mm_rcp14_sd (__m128d __A, __m128d __B)
1799 return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B,
1800 (__v2df) __A);
1803 extern __inline __m128d
1804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1805 _mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1807 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __B,
1808 (__v2df) __A,
1809 (__v2df) __W,
1810 (__mmask8) __U);
1813 extern __inline __m128d
1814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1815 _mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1817 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __B,
1818 (__v2df) __A,
1819 (__v2df) _mm_setzero_ps (),
1820 (__mmask8) __U);
1823 extern __inline __m128
1824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1825 _mm_rcp14_ss (__m128 __A, __m128 __B)
1827 return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B,
1828 (__v4sf) __A);
1831 extern __inline __m128
1832 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1833 _mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1835 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __B,
1836 (__v4sf) __A,
1837 (__v4sf) __W,
1838 (__mmask8) __U);
1841 extern __inline __m128
1842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1843 _mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1845 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __B,
1846 (__v4sf) __A,
1847 (__v4sf) _mm_setzero_ps (),
1848 (__mmask8) __U);
1851 extern __inline __m512d
1852 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1853 _mm512_rsqrt14_pd (__m512d __A)
1855 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1856 (__v8df)
1857 _mm512_undefined_pd (),
1858 (__mmask8) -1);
1861 extern __inline __m512d
1862 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1863 _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1865 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1866 (__v8df) __W,
1867 (__mmask8) __U);
1870 extern __inline __m512d
1871 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1872 _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
1874 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1875 (__v8df)
1876 _mm512_setzero_pd (),
1877 (__mmask8) __U);
1880 extern __inline __m512
1881 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1882 _mm512_rsqrt14_ps (__m512 __A)
1884 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1885 (__v16sf)
1886 _mm512_undefined_ps (),
1887 (__mmask16) -1);
1890 extern __inline __m512
1891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1892 _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1894 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1895 (__v16sf) __W,
1896 (__mmask16) __U);
1899 extern __inline __m512
1900 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1901 _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
1903 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1904 (__v16sf)
1905 _mm512_setzero_ps (),
1906 (__mmask16) __U);
1909 extern __inline __m128d
1910 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1911 _mm_rsqrt14_sd (__m128d __A, __m128d __B)
1913 return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __B,
1914 (__v2df) __A);
1917 extern __inline __m128d
1918 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1919 _mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1921 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B,
1922 (__v2df) __A,
1923 (__v2df) __W,
1924 (__mmask8) __U);
1927 extern __inline __m128d
1928 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1929 _mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1931 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B,
1932 (__v2df) __A,
1933 (__v2df) _mm_setzero_pd (),
1934 (__mmask8) __U);
1937 extern __inline __m128
1938 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1939 _mm_rsqrt14_ss (__m128 __A, __m128 __B)
1941 return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __B,
1942 (__v4sf) __A);
1945 extern __inline __m128
1946 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1947 _mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1949 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B,
1950 (__v4sf) __A,
1951 (__v4sf) __W,
1952 (__mmask8) __U);
1955 extern __inline __m128
1956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1957 _mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1959 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B,
1960 (__v4sf) __A,
1961 (__v4sf) _mm_setzero_ps (),
1962 (__mmask8) __U);
1965 #ifdef __OPTIMIZE__
1966 extern __inline __m512d
1967 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1968 _mm512_sqrt_round_pd (__m512d __A, const int __R)
1970 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1971 (__v8df)
1972 _mm512_undefined_pd (),
1973 (__mmask8) -1, __R);
1976 extern __inline __m512d
1977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1978 _mm512_mask_sqrt_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
1979 const int __R)
1981 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1982 (__v8df) __W,
1983 (__mmask8) __U, __R);
1986 extern __inline __m512d
1987 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1988 _mm512_maskz_sqrt_round_pd (__mmask8 __U, __m512d __A, const int __R)
1990 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1991 (__v8df)
1992 _mm512_setzero_pd (),
1993 (__mmask8) __U, __R);
1996 extern __inline __m512
1997 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1998 _mm512_sqrt_round_ps (__m512 __A, const int __R)
2000 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
2001 (__v16sf)
2002 _mm512_undefined_ps (),
2003 (__mmask16) -1, __R);
2006 extern __inline __m512
2007 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2008 _mm512_mask_sqrt_round_ps (__m512 __W, __mmask16 __U, __m512 __A, const int __R)
2010 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
2011 (__v16sf) __W,
2012 (__mmask16) __U, __R);
2015 extern __inline __m512
2016 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2017 _mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R)
2019 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
2020 (__v16sf)
2021 _mm512_setzero_ps (),
2022 (__mmask16) __U, __R);
2025 extern __inline __m128d
2026 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2027 _mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R)
2029 return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B,
2030 (__v2df) __A,
2031 (__v2df)
2032 _mm_setzero_pd (),
2033 (__mmask8) -1, __R);
2036 extern __inline __m128d
2037 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2038 _mm_mask_sqrt_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
2039 const int __R)
2041 return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B,
2042 (__v2df) __A,
2043 (__v2df) __W,
2044 (__mmask8) __U, __R);
2047 extern __inline __m128d
2048 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2049 _mm_maskz_sqrt_round_sd (__mmask8 __U, __m128d __A, __m128d __B, const int __R)
2051 return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B,
2052 (__v2df) __A,
2053 (__v2df)
2054 _mm_setzero_pd (),
2055 (__mmask8) __U, __R);
2058 extern __inline __m128
2059 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2060 _mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R)
2062 return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B,
2063 (__v4sf) __A,
2064 (__v4sf)
2065 _mm_setzero_ps (),
2066 (__mmask8) -1, __R);
2069 extern __inline __m128
2070 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2071 _mm_mask_sqrt_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
2072 const int __R)
2074 return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B,
2075 (__v4sf) __A,
2076 (__v4sf) __W,
2077 (__mmask8) __U, __R);
2080 extern __inline __m128
2081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2082 _mm_maskz_sqrt_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
2084 return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B,
2085 (__v4sf) __A,
2086 (__v4sf)
2087 _mm_setzero_ps (),
2088 (__mmask8) __U, __R);
2090 #else
2091 #define _mm512_sqrt_round_pd(A, C) \
2092 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, C)
2094 #define _mm512_mask_sqrt_round_pd(W, U, A, C) \
2095 (__m512d)__builtin_ia32_sqrtpd512_mask(A, W, U, C)
2097 #define _mm512_maskz_sqrt_round_pd(U, A, C) \
2098 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
2100 #define _mm512_sqrt_round_ps(A, C) \
2101 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_undefined_ps(), -1, C)
2103 #define _mm512_mask_sqrt_round_ps(W, U, A, C) \
2104 (__m512)__builtin_ia32_sqrtps512_mask(A, W, U, C)
2106 #define _mm512_maskz_sqrt_round_ps(U, A, C) \
2107 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
2109 #define _mm_sqrt_round_sd(A, B, C) \
2110 (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, \
2111 (__v2df) _mm_setzero_pd (), -1, C)
2113 #define _mm_mask_sqrt_round_sd(W, U, A, B, C) \
2114 (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, W, U, C)
2116 #define _mm_maskz_sqrt_round_sd(U, A, B, C) \
2117 (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, \
2118 (__v2df) _mm_setzero_pd (), U, C)
2120 #define _mm_sqrt_round_ss(A, B, C) \
2121 (__m128)__builtin_ia32_sqrtss_mask_round (B, A, \
2122 (__v4sf) _mm_setzero_ps (), -1, C)
2124 #define _mm_mask_sqrt_round_ss(W, U, A, B, C) \
2125 (__m128)__builtin_ia32_sqrtss_mask_round (B, A, W, U, C)
2127 #define _mm_maskz_sqrt_round_ss(U, A, B, C) \
2128 (__m128)__builtin_ia32_sqrtss_mask_round (B, A, \
2129 (__v4sf) _mm_setzero_ps (), U, C)
2130 #endif
2132 extern __inline __m512i
2133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2134 _mm512_cvtepi8_epi32 (__m128i __A)
2136 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
2137 (__v16si)
2138 _mm512_undefined_epi32 (),
2139 (__mmask16) -1);
2142 extern __inline __m512i
2143 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2144 _mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
2146 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
2147 (__v16si) __W,
2148 (__mmask16) __U);
2151 extern __inline __m512i
2152 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2153 _mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A)
2155 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
2156 (__v16si)
2157 _mm512_setzero_si512 (),
2158 (__mmask16) __U);
2161 extern __inline __m512i
2162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2163 _mm512_cvtepi8_epi64 (__m128i __A)
2165 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
2166 (__v8di)
2167 _mm512_undefined_epi32 (),
2168 (__mmask8) -1);
2171 extern __inline __m512i
2172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2173 _mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2175 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
2176 (__v8di) __W,
2177 (__mmask8) __U);
2180 extern __inline __m512i
2181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2182 _mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
2184 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
2185 (__v8di)
2186 _mm512_setzero_si512 (),
2187 (__mmask8) __U);
2190 extern __inline __m512i
2191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2192 _mm512_cvtepi16_epi32 (__m256i __A)
2194 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
2195 (__v16si)
2196 _mm512_undefined_epi32 (),
2197 (__mmask16) -1);
2200 extern __inline __m512i
2201 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2202 _mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
2204 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
2205 (__v16si) __W,
2206 (__mmask16) __U);
2209 extern __inline __m512i
2210 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2211 _mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A)
2213 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
2214 (__v16si)
2215 _mm512_setzero_si512 (),
2216 (__mmask16) __U);
2219 extern __inline __m512i
2220 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2221 _mm512_cvtepi16_epi64 (__m128i __A)
2223 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
2224 (__v8di)
2225 _mm512_undefined_epi32 (),
2226 (__mmask8) -1);
2229 extern __inline __m512i
2230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2231 _mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2233 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
2234 (__v8di) __W,
2235 (__mmask8) __U);
2238 extern __inline __m512i
2239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2240 _mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
2242 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
2243 (__v8di)
2244 _mm512_setzero_si512 (),
2245 (__mmask8) __U);
2248 extern __inline __m512i
2249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2250 _mm512_cvtepi32_epi64 (__m256i __X)
2252 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
2253 (__v8di)
2254 _mm512_undefined_epi32 (),
2255 (__mmask8) -1);
2258 extern __inline __m512i
2259 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2260 _mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
2262 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
2263 (__v8di) __W,
2264 (__mmask8) __U);
2267 extern __inline __m512i
2268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2269 _mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X)
2271 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
2272 (__v8di)
2273 _mm512_setzero_si512 (),
2274 (__mmask8) __U);
2277 extern __inline __m512i
2278 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2279 _mm512_cvtepu8_epi32 (__m128i __A)
2281 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
2282 (__v16si)
2283 _mm512_undefined_epi32 (),
2284 (__mmask16) -1);
2287 extern __inline __m512i
2288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2289 _mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
2291 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
2292 (__v16si) __W,
2293 (__mmask16) __U);
2296 extern __inline __m512i
2297 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2298 _mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A)
2300 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
2301 (__v16si)
2302 _mm512_setzero_si512 (),
2303 (__mmask16) __U);
2306 extern __inline __m512i
2307 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2308 _mm512_cvtepu8_epi64 (__m128i __A)
2310 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
2311 (__v8di)
2312 _mm512_undefined_epi32 (),
2313 (__mmask8) -1);
2316 extern __inline __m512i
2317 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2318 _mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2320 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
2321 (__v8di) __W,
2322 (__mmask8) __U);
2325 extern __inline __m512i
2326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2327 _mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
2329 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
2330 (__v8di)
2331 _mm512_setzero_si512 (),
2332 (__mmask8) __U);
2335 extern __inline __m512i
2336 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2337 _mm512_cvtepu16_epi32 (__m256i __A)
2339 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2340 (__v16si)
2341 _mm512_undefined_epi32 (),
2342 (__mmask16) -1);
2345 extern __inline __m512i
2346 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2347 _mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
2349 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2350 (__v16si) __W,
2351 (__mmask16) __U);
2354 extern __inline __m512i
2355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2356 _mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A)
2358 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2359 (__v16si)
2360 _mm512_setzero_si512 (),
2361 (__mmask16) __U);
2364 extern __inline __m512i
2365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2366 _mm512_cvtepu16_epi64 (__m128i __A)
2368 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2369 (__v8di)
2370 _mm512_undefined_epi32 (),
2371 (__mmask8) -1);
2374 extern __inline __m512i
2375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2376 _mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2378 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2379 (__v8di) __W,
2380 (__mmask8) __U);
2383 extern __inline __m512i
2384 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2385 _mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
2387 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2388 (__v8di)
2389 _mm512_setzero_si512 (),
2390 (__mmask8) __U);
2393 extern __inline __m512i
2394 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2395 _mm512_cvtepu32_epi64 (__m256i __X)
2397 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2398 (__v8di)
2399 _mm512_undefined_epi32 (),
2400 (__mmask8) -1);
2403 extern __inline __m512i
2404 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2405 _mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
2407 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2408 (__v8di) __W,
2409 (__mmask8) __U);
2412 extern __inline __m512i
2413 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2414 _mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X)
2416 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2417 (__v8di)
2418 _mm512_setzero_si512 (),
2419 (__mmask8) __U);
2422 #ifdef __OPTIMIZE__
2423 extern __inline __m512d
2424 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2425 _mm512_add_round_pd (__m512d __A, __m512d __B, const int __R)
2427 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2428 (__v8df) __B,
2429 (__v8df)
2430 _mm512_undefined_pd (),
2431 (__mmask8) -1, __R);
2434 extern __inline __m512d
2435 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2436 _mm512_mask_add_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2437 __m512d __B, const int __R)
2439 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2440 (__v8df) __B,
2441 (__v8df) __W,
2442 (__mmask8) __U, __R);
2445 extern __inline __m512d
2446 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2447 _mm512_maskz_add_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2448 const int __R)
2450 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2451 (__v8df) __B,
2452 (__v8df)
2453 _mm512_setzero_pd (),
2454 (__mmask8) __U, __R);
2457 extern __inline __m512
2458 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2459 _mm512_add_round_ps (__m512 __A, __m512 __B, const int __R)
2461 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2462 (__v16sf) __B,
2463 (__v16sf)
2464 _mm512_undefined_ps (),
2465 (__mmask16) -1, __R);
2468 extern __inline __m512
2469 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2470 _mm512_mask_add_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2471 __m512 __B, const int __R)
2473 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2474 (__v16sf) __B,
2475 (__v16sf) __W,
2476 (__mmask16) __U, __R);
2479 extern __inline __m512
2480 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2481 _mm512_maskz_add_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2483 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2484 (__v16sf) __B,
2485 (__v16sf)
2486 _mm512_setzero_ps (),
2487 (__mmask16) __U, __R);
2490 extern __inline __m512d
2491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2492 _mm512_sub_round_pd (__m512d __A, __m512d __B, const int __R)
2494 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2495 (__v8df) __B,
2496 (__v8df)
2497 _mm512_undefined_pd (),
2498 (__mmask8) -1, __R);
2501 extern __inline __m512d
2502 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2503 _mm512_mask_sub_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2504 __m512d __B, const int __R)
2506 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2507 (__v8df) __B,
2508 (__v8df) __W,
2509 (__mmask8) __U, __R);
2512 extern __inline __m512d
2513 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2514 _mm512_maskz_sub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2515 const int __R)
2517 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2518 (__v8df) __B,
2519 (__v8df)
2520 _mm512_setzero_pd (),
2521 (__mmask8) __U, __R);
2524 extern __inline __m512
2525 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2526 _mm512_sub_round_ps (__m512 __A, __m512 __B, const int __R)
2528 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2529 (__v16sf) __B,
2530 (__v16sf)
2531 _mm512_undefined_ps (),
2532 (__mmask16) -1, __R);
2535 extern __inline __m512
2536 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2537 _mm512_mask_sub_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2538 __m512 __B, const int __R)
2540 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2541 (__v16sf) __B,
2542 (__v16sf) __W,
2543 (__mmask16) __U, __R);
2546 extern __inline __m512
2547 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2548 _mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2550 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2551 (__v16sf) __B,
2552 (__v16sf)
2553 _mm512_setzero_ps (),
2554 (__mmask16) __U, __R);
2556 #else
2557 #define _mm512_add_round_pd(A, B, C) \
2558 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2560 #define _mm512_mask_add_round_pd(W, U, A, B, C) \
2561 (__m512d)__builtin_ia32_addpd512_mask(A, B, W, U, C)
2563 #define _mm512_maskz_add_round_pd(U, A, B, C) \
2564 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2566 #define _mm512_add_round_ps(A, B, C) \
2567 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2569 #define _mm512_mask_add_round_ps(W, U, A, B, C) \
2570 (__m512)__builtin_ia32_addps512_mask(A, B, W, U, C)
2572 #define _mm512_maskz_add_round_ps(U, A, B, C) \
2573 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2575 #define _mm512_sub_round_pd(A, B, C) \
2576 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2578 #define _mm512_mask_sub_round_pd(W, U, A, B, C) \
2579 (__m512d)__builtin_ia32_subpd512_mask(A, B, W, U, C)
2581 #define _mm512_maskz_sub_round_pd(U, A, B, C) \
2582 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2584 #define _mm512_sub_round_ps(A, B, C) \
2585 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2587 #define _mm512_mask_sub_round_ps(W, U, A, B, C) \
2588 (__m512)__builtin_ia32_subps512_mask(A, B, W, U, C)
2590 #define _mm512_maskz_sub_round_ps(U, A, B, C) \
2591 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2592 #endif
2594 #ifdef __OPTIMIZE__
2595 extern __inline __m512d
2596 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2597 _mm512_mul_round_pd (__m512d __A, __m512d __B, const int __R)
2599 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2600 (__v8df) __B,
2601 (__v8df)
2602 _mm512_undefined_pd (),
2603 (__mmask8) -1, __R);
2606 extern __inline __m512d
2607 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2608 _mm512_mask_mul_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2609 __m512d __B, const int __R)
2611 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2612 (__v8df) __B,
2613 (__v8df) __W,
2614 (__mmask8) __U, __R);
2617 extern __inline __m512d
2618 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2619 _mm512_maskz_mul_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2620 const int __R)
2622 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2623 (__v8df) __B,
2624 (__v8df)
2625 _mm512_setzero_pd (),
2626 (__mmask8) __U, __R);
2629 extern __inline __m512
2630 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2631 _mm512_mul_round_ps (__m512 __A, __m512 __B, const int __R)
2633 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2634 (__v16sf) __B,
2635 (__v16sf)
2636 _mm512_undefined_ps (),
2637 (__mmask16) -1, __R);
2640 extern __inline __m512
2641 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2642 _mm512_mask_mul_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2643 __m512 __B, const int __R)
2645 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2646 (__v16sf) __B,
2647 (__v16sf) __W,
2648 (__mmask16) __U, __R);
2651 extern __inline __m512
2652 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2653 _mm512_maskz_mul_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2655 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2656 (__v16sf) __B,
2657 (__v16sf)
2658 _mm512_setzero_ps (),
2659 (__mmask16) __U, __R);
2662 extern __inline __m512d
2663 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2664 _mm512_div_round_pd (__m512d __M, __m512d __V, const int __R)
2666 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2667 (__v8df) __V,
2668 (__v8df)
2669 _mm512_undefined_pd (),
2670 (__mmask8) -1, __R);
2673 extern __inline __m512d
2674 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2675 _mm512_mask_div_round_pd (__m512d __W, __mmask8 __U, __m512d __M,
2676 __m512d __V, const int __R)
2678 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2679 (__v8df) __V,
2680 (__v8df) __W,
2681 (__mmask8) __U, __R);
2684 extern __inline __m512d
2685 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2686 _mm512_maskz_div_round_pd (__mmask8 __U, __m512d __M, __m512d __V,
2687 const int __R)
2689 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2690 (__v8df) __V,
2691 (__v8df)
2692 _mm512_setzero_pd (),
2693 (__mmask8) __U, __R);
2696 extern __inline __m512
2697 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2698 _mm512_div_round_ps (__m512 __A, __m512 __B, const int __R)
2700 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2701 (__v16sf) __B,
2702 (__v16sf)
2703 _mm512_undefined_ps (),
2704 (__mmask16) -1, __R);
2707 extern __inline __m512
2708 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2709 _mm512_mask_div_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2710 __m512 __B, const int __R)
2712 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2713 (__v16sf) __B,
2714 (__v16sf) __W,
2715 (__mmask16) __U, __R);
2718 extern __inline __m512
2719 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2720 _mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2722 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2723 (__v16sf) __B,
2724 (__v16sf)
2725 _mm512_setzero_ps (),
2726 (__mmask16) __U, __R);
2729 extern __inline __m128d
2730 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2731 _mm_mul_round_sd (__m128d __A, __m128d __B, const int __R)
2733 return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
2734 (__v2df) __B,
2735 __R);
2738 extern __inline __m128d
2739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2740 _mm_mask_mul_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
2741 __m128d __B, const int __R)
2743 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
2744 (__v2df) __B,
2745 (__v2df) __W,
2746 (__mmask8) __U, __R);
2749 extern __inline __m128d
2750 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2751 _mm_maskz_mul_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
2752 const int __R)
2754 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
2755 (__v2df) __B,
2756 (__v2df)
2757 _mm_setzero_pd (),
2758 (__mmask8) __U, __R);
2761 extern __inline __m128
2762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2763 _mm_mul_round_ss (__m128 __A, __m128 __B, const int __R)
2765 return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
2766 (__v4sf) __B,
2767 __R);
2770 extern __inline __m128
2771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2772 _mm_mask_mul_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
2773 __m128 __B, const int __R)
2775 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
2776 (__v4sf) __B,
2777 (__v4sf) __W,
2778 (__mmask8) __U, __R);
2781 extern __inline __m128
2782 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2783 _mm_maskz_mul_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
2784 const int __R)
2786 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
2787 (__v4sf) __B,
2788 (__v4sf)
2789 _mm_setzero_ps (),
2790 (__mmask8) __U, __R);
2793 extern __inline __m128d
2794 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2795 _mm_div_round_sd (__m128d __A, __m128d __B, const int __R)
2797 return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
2798 (__v2df) __B,
2799 __R);
2802 extern __inline __m128d
2803 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2804 _mm_mask_div_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
2805 __m128d __B, const int __R)
2807 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
2808 (__v2df) __B,
2809 (__v2df) __W,
2810 (__mmask8) __U, __R);
2813 extern __inline __m128d
2814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2815 _mm_maskz_div_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
2816 const int __R)
2818 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
2819 (__v2df) __B,
2820 (__v2df)
2821 _mm_setzero_pd (),
2822 (__mmask8) __U, __R);
2825 extern __inline __m128
2826 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2827 _mm_div_round_ss (__m128 __A, __m128 __B, const int __R)
2829 return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
2830 (__v4sf) __B,
2831 __R);
2834 extern __inline __m128
2835 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2836 _mm_mask_div_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
2837 __m128 __B, const int __R)
2839 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
2840 (__v4sf) __B,
2841 (__v4sf) __W,
2842 (__mmask8) __U, __R);
2845 extern __inline __m128
2846 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2847 _mm_maskz_div_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
2848 const int __R)
2850 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
2851 (__v4sf) __B,
2852 (__v4sf)
2853 _mm_setzero_ps (),
2854 (__mmask8) __U, __R);
2857 #else
2858 #define _mm512_mul_round_pd(A, B, C) \
2859 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2861 #define _mm512_mask_mul_round_pd(W, U, A, B, C) \
2862 (__m512d)__builtin_ia32_mulpd512_mask(A, B, W, U, C)
2864 #define _mm512_maskz_mul_round_pd(U, A, B, C) \
2865 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2867 #define _mm512_mul_round_ps(A, B, C) \
2868 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2870 #define _mm512_mask_mul_round_ps(W, U, A, B, C) \
2871 (__m512)__builtin_ia32_mulps512_mask(A, B, W, U, C)
2873 #define _mm512_maskz_mul_round_ps(U, A, B, C) \
2874 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2876 #define _mm512_div_round_pd(A, B, C) \
2877 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2879 #define _mm512_mask_div_round_pd(W, U, A, B, C) \
2880 (__m512d)__builtin_ia32_divpd512_mask(A, B, W, U, C)
2882 #define _mm512_maskz_div_round_pd(U, A, B, C) \
2883 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2885 #define _mm512_div_round_ps(A, B, C) \
2886 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2888 #define _mm512_mask_div_round_ps(W, U, A, B, C) \
2889 (__m512)__builtin_ia32_divps512_mask(A, B, W, U, C)
2891 #define _mm512_maskz_div_round_ps(U, A, B, C) \
2892 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2894 #define _mm_mul_round_sd(A, B, C) \
2895 (__m128d)__builtin_ia32_mulsd_round(A, B, C)
2897 #define _mm_mask_mul_round_sd(W, U, A, B, C) \
2898 (__m128d)__builtin_ia32_mulsd_mask_round(A, B, W, U, C)
2900 #define _mm_maskz_mul_round_sd(U, A, B, C) \
2901 (__m128d)__builtin_ia32_mulsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
2903 #define _mm_mul_round_ss(A, B, C) \
2904 (__m128)__builtin_ia32_mulss_round(A, B, C)
2906 #define _mm_mask_mul_round_ss(W, U, A, B, C) \
2907 (__m128)__builtin_ia32_mulss_mask_round(A, B, W, U, C)
2909 #define _mm_maskz_mul_round_ss(U, A, B, C) \
2910 (__m128)__builtin_ia32_mulss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
2912 #define _mm_div_round_sd(A, B, C) \
2913 (__m128d)__builtin_ia32_divsd_round(A, B, C)
2915 #define _mm_mask_div_round_sd(W, U, A, B, C) \
2916 (__m128d)__builtin_ia32_divsd_mask_round(A, B, W, U, C)
2918 #define _mm_maskz_div_round_sd(U, A, B, C) \
2919 (__m128d)__builtin_ia32_divsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
2921 #define _mm_div_round_ss(A, B, C) \
2922 (__m128)__builtin_ia32_divss_round(A, B, C)
2924 #define _mm_mask_div_round_ss(W, U, A, B, C) \
2925 (__m128)__builtin_ia32_divss_mask_round(A, B, W, U, C)
2927 #define _mm_maskz_div_round_ss(U, A, B, C) \
2928 (__m128)__builtin_ia32_divss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
2930 #endif
2932 #ifdef __OPTIMIZE__
2933 extern __inline __m512d
2934 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2935 _mm512_max_round_pd (__m512d __A, __m512d __B, const int __R)
2937 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2938 (__v8df) __B,
2939 (__v8df)
2940 _mm512_undefined_pd (),
2941 (__mmask8) -1, __R);
2944 extern __inline __m512d
2945 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2946 _mm512_mask_max_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2947 __m512d __B, const int __R)
2949 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2950 (__v8df) __B,
2951 (__v8df) __W,
2952 (__mmask8) __U, __R);
2955 extern __inline __m512d
2956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2957 _mm512_maskz_max_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2958 const int __R)
2960 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2961 (__v8df) __B,
2962 (__v8df)
2963 _mm512_setzero_pd (),
2964 (__mmask8) __U, __R);
2967 extern __inline __m512
2968 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2969 _mm512_max_round_ps (__m512 __A, __m512 __B, const int __R)
2971 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2972 (__v16sf) __B,
2973 (__v16sf)
2974 _mm512_undefined_ps (),
2975 (__mmask16) -1, __R);
2978 extern __inline __m512
2979 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2980 _mm512_mask_max_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2981 __m512 __B, const int __R)
2983 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2984 (__v16sf) __B,
2985 (__v16sf) __W,
2986 (__mmask16) __U, __R);
2989 extern __inline __m512
2990 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2991 _mm512_maskz_max_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2993 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2994 (__v16sf) __B,
2995 (__v16sf)
2996 _mm512_setzero_ps (),
2997 (__mmask16) __U, __R);
3000 extern __inline __m512d
3001 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3002 _mm512_min_round_pd (__m512d __A, __m512d __B, const int __R)
3004 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
3005 (__v8df) __B,
3006 (__v8df)
3007 _mm512_undefined_pd (),
3008 (__mmask8) -1, __R);
3011 extern __inline __m512d
3012 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3013 _mm512_mask_min_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
3014 __m512d __B, const int __R)
3016 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
3017 (__v8df) __B,
3018 (__v8df) __W,
3019 (__mmask8) __U, __R);
3022 extern __inline __m512d
3023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3024 _mm512_maskz_min_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3025 const int __R)
3027 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
3028 (__v8df) __B,
3029 (__v8df)
3030 _mm512_setzero_pd (),
3031 (__mmask8) __U, __R);
3034 extern __inline __m512
3035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3036 _mm512_min_round_ps (__m512 __A, __m512 __B, const int __R)
3038 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
3039 (__v16sf) __B,
3040 (__v16sf)
3041 _mm512_undefined_ps (),
3042 (__mmask16) -1, __R);
3045 extern __inline __m512
3046 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3047 _mm512_mask_min_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
3048 __m512 __B, const int __R)
3050 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
3051 (__v16sf) __B,
3052 (__v16sf) __W,
3053 (__mmask16) __U, __R);
3056 extern __inline __m512
3057 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3058 _mm512_maskz_min_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
3060 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
3061 (__v16sf) __B,
3062 (__v16sf)
3063 _mm512_setzero_ps (),
3064 (__mmask16) __U, __R);
3066 #else
3067 #define _mm512_max_round_pd(A, B, R) \
3068 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
3070 #define _mm512_mask_max_round_pd(W, U, A, B, R) \
3071 (__m512d)__builtin_ia32_maxpd512_mask(A, B, W, U, R)
3073 #define _mm512_maskz_max_round_pd(U, A, B, R) \
3074 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
3076 #define _mm512_max_round_ps(A, B, R) \
3077 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_undefined_pd(), -1, R)
3079 #define _mm512_mask_max_round_ps(W, U, A, B, R) \
3080 (__m512)__builtin_ia32_maxps512_mask(A, B, W, U, R)
3082 #define _mm512_maskz_max_round_ps(U, A, B, R) \
3083 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
3085 #define _mm512_min_round_pd(A, B, R) \
3086 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
3088 #define _mm512_mask_min_round_pd(W, U, A, B, R) \
3089 (__m512d)__builtin_ia32_minpd512_mask(A, B, W, U, R)
3091 #define _mm512_maskz_min_round_pd(U, A, B, R) \
3092 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
3094 #define _mm512_min_round_ps(A, B, R) \
3095 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, R)
3097 #define _mm512_mask_min_round_ps(W, U, A, B, R) \
3098 (__m512)__builtin_ia32_minps512_mask(A, B, W, U, R)
3100 #define _mm512_maskz_min_round_ps(U, A, B, R) \
3101 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
3102 #endif
3104 #ifdef __OPTIMIZE__
3105 extern __inline __m512d
3106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3107 _mm512_scalef_round_pd (__m512d __A, __m512d __B, const int __R)
3109 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
3110 (__v8df) __B,
3111 (__v8df)
3112 _mm512_undefined_pd (),
3113 (__mmask8) -1, __R);
3116 extern __inline __m512d
3117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3118 _mm512_mask_scalef_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
3119 __m512d __B, const int __R)
3121 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
3122 (__v8df) __B,
3123 (__v8df) __W,
3124 (__mmask8) __U, __R);
3127 extern __inline __m512d
3128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3129 _mm512_maskz_scalef_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3130 const int __R)
3132 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
3133 (__v8df) __B,
3134 (__v8df)
3135 _mm512_setzero_pd (),
3136 (__mmask8) __U, __R);
3139 extern __inline __m512
3140 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3141 _mm512_scalef_round_ps (__m512 __A, __m512 __B, const int __R)
3143 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
3144 (__v16sf) __B,
3145 (__v16sf)
3146 _mm512_undefined_ps (),
3147 (__mmask16) -1, __R);
3150 extern __inline __m512
3151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3152 _mm512_mask_scalef_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
3153 __m512 __B, const int __R)
3155 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
3156 (__v16sf) __B,
3157 (__v16sf) __W,
3158 (__mmask16) __U, __R);
3161 extern __inline __m512
3162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3163 _mm512_maskz_scalef_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3164 const int __R)
3166 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
3167 (__v16sf) __B,
3168 (__v16sf)
3169 _mm512_setzero_ps (),
3170 (__mmask16) __U, __R);
3173 extern __inline __m128d
3174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3175 _mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R)
3177 return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
3178 (__v2df) __B,
3179 (__v2df)
3180 _mm_setzero_pd (),
3181 (__mmask8) -1, __R);
3184 extern __inline __m128d
3185 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3186 _mm_mask_scalef_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
3187 const int __R)
3189 return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
3190 (__v2df) __B,
3191 (__v2df) __W,
3192 (__mmask8) __U, __R);
3195 extern __inline __m128d
3196 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3197 _mm_maskz_scalef_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
3198 const int __R)
3200 return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
3201 (__v2df) __B,
3202 (__v2df)
3203 _mm_setzero_pd (),
3204 (__mmask8) __U, __R);
3207 extern __inline __m128
3208 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3209 _mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R)
3211 return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
3212 (__v4sf) __B,
3213 (__v4sf)
3214 _mm_setzero_ps (),
3215 (__mmask8) -1, __R);
3218 extern __inline __m128
3219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3220 _mm_mask_scalef_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
3221 const int __R)
3223 return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
3224 (__v4sf) __B,
3225 (__v4sf) __W,
3226 (__mmask8) __U, __R);
3229 extern __inline __m128
3230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3231 _mm_maskz_scalef_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
3233 return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
3234 (__v4sf) __B,
3235 (__v4sf)
3236 _mm_setzero_ps (),
3237 (__mmask8) __U, __R);
3239 #else
3240 #define _mm512_scalef_round_pd(A, B, C) \
3241 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
3243 #define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
3244 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C)
3246 #define _mm512_maskz_scalef_round_pd(U, A, B, C) \
3247 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
3249 #define _mm512_scalef_round_ps(A, B, C) \
3250 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
3252 #define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
3253 (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C)
3255 #define _mm512_maskz_scalef_round_ps(U, A, B, C) \
3256 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
3258 #define _mm_scalef_round_sd(A, B, C) \
3259 (__m128d)__builtin_ia32_scalefsd_mask_round (A, B, \
3260 (__v2df)_mm_setzero_pd (), -1, C)
3262 #define _mm_scalef_round_ss(A, B, C) \
3263 (__m128)__builtin_ia32_scalefss_mask_round (A, B, \
3264 (__v4sf)_mm_setzero_ps (), -1, C)
3265 #endif
3267 #ifdef __OPTIMIZE__
3268 extern __inline __m512d
3269 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3270 _mm512_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3272 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3273 (__v8df) __B,
3274 (__v8df) __C,
3275 (__mmask8) -1, __R);
3278 extern __inline __m512d
3279 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3280 _mm512_mask_fmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3281 __m512d __C, const int __R)
3283 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3284 (__v8df) __B,
3285 (__v8df) __C,
3286 (__mmask8) __U, __R);
3289 extern __inline __m512d
3290 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3291 _mm512_mask3_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3292 __mmask8 __U, const int __R)
3294 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
3295 (__v8df) __B,
3296 (__v8df) __C,
3297 (__mmask8) __U, __R);
3300 extern __inline __m512d
3301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3302 _mm512_maskz_fmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3303 __m512d __C, const int __R)
3305 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
3306 (__v8df) __B,
3307 (__v8df) __C,
3308 (__mmask8) __U, __R);
3311 extern __inline __m512
3312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3313 _mm512_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3315 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3316 (__v16sf) __B,
3317 (__v16sf) __C,
3318 (__mmask16) -1, __R);
3321 extern __inline __m512
3322 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3323 _mm512_mask_fmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3324 __m512 __C, const int __R)
3326 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3327 (__v16sf) __B,
3328 (__v16sf) __C,
3329 (__mmask16) __U, __R);
3332 extern __inline __m512
3333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3334 _mm512_mask3_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3335 __mmask16 __U, const int __R)
3337 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
3338 (__v16sf) __B,
3339 (__v16sf) __C,
3340 (__mmask16) __U, __R);
3343 extern __inline __m512
3344 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3345 _mm512_maskz_fmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3346 __m512 __C, const int __R)
3348 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
3349 (__v16sf) __B,
3350 (__v16sf) __C,
3351 (__mmask16) __U, __R);
3354 extern __inline __m512d
3355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3356 _mm512_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3358 return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A,
3359 (__v8df) __B,
3360 (__v8df) __C,
3361 (__mmask8) -1, __R);
3364 extern __inline __m512d
3365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3366 _mm512_mask_fmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3367 __m512d __C, const int __R)
3369 return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A,
3370 (__v8df) __B,
3371 (__v8df) __C,
3372 (__mmask8) __U, __R);
3375 extern __inline __m512d
3376 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3377 _mm512_mask3_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3378 __mmask8 __U, const int __R)
3380 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
3381 (__v8df) __B,
3382 (__v8df) __C,
3383 (__mmask8) __U, __R);
3386 extern __inline __m512d
3387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3388 _mm512_maskz_fmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3389 __m512d __C, const int __R)
3391 return (__m512d) __builtin_ia32_vfmsubpd512_maskz ((__v8df) __A,
3392 (__v8df) __B,
3393 (__v8df) __C,
3394 (__mmask8) __U, __R);
3397 extern __inline __m512
3398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3399 _mm512_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3401 return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A,
3402 (__v16sf) __B,
3403 (__v16sf) __C,
3404 (__mmask16) -1, __R);
3407 extern __inline __m512
3408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3409 _mm512_mask_fmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3410 __m512 __C, const int __R)
3412 return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A,
3413 (__v16sf) __B,
3414 (__v16sf) __C,
3415 (__mmask16) __U, __R);
3418 extern __inline __m512
3419 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3420 _mm512_mask3_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3421 __mmask16 __U, const int __R)
3423 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
3424 (__v16sf) __B,
3425 (__v16sf) __C,
3426 (__mmask16) __U, __R);
3429 extern __inline __m512
3430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3431 _mm512_maskz_fmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3432 __m512 __C, const int __R)
3434 return (__m512) __builtin_ia32_vfmsubps512_maskz ((__v16sf) __A,
3435 (__v16sf) __B,
3436 (__v16sf) __C,
3437 (__mmask16) __U, __R);
3440 extern __inline __m512d
3441 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3442 _mm512_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3444 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3445 (__v8df) __B,
3446 (__v8df) __C,
3447 (__mmask8) -1, __R);
3450 extern __inline __m512d
3451 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3452 _mm512_mask_fmaddsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3453 __m512d __C, const int __R)
3455 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3456 (__v8df) __B,
3457 (__v8df) __C,
3458 (__mmask8) __U, __R);
3461 extern __inline __m512d
3462 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3463 _mm512_mask3_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3464 __mmask8 __U, const int __R)
3466 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
3467 (__v8df) __B,
3468 (__v8df) __C,
3469 (__mmask8) __U, __R);
3472 extern __inline __m512d
3473 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3474 _mm512_maskz_fmaddsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3475 __m512d __C, const int __R)
3477 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3478 (__v8df) __B,
3479 (__v8df) __C,
3480 (__mmask8) __U, __R);
3483 extern __inline __m512
3484 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3485 _mm512_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3487 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3488 (__v16sf) __B,
3489 (__v16sf) __C,
3490 (__mmask16) -1, __R);
3493 extern __inline __m512
3494 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3495 _mm512_mask_fmaddsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3496 __m512 __C, const int __R)
3498 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3499 (__v16sf) __B,
3500 (__v16sf) __C,
3501 (__mmask16) __U, __R);
3504 extern __inline __m512
3505 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3506 _mm512_mask3_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3507 __mmask16 __U, const int __R)
3509 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
3510 (__v16sf) __B,
3511 (__v16sf) __C,
3512 (__mmask16) __U, __R);
3515 extern __inline __m512
3516 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3517 _mm512_maskz_fmaddsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3518 __m512 __C, const int __R)
3520 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3521 (__v16sf) __B,
3522 (__v16sf) __C,
3523 (__mmask16) __U, __R);
3526 extern __inline __m512d
3527 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3528 _mm512_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3530 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3531 (__v8df) __B,
3532 -(__v8df) __C,
3533 (__mmask8) -1, __R);
3536 extern __inline __m512d
3537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3538 _mm512_mask_fmsubadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3539 __m512d __C, const int __R)
3541 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3542 (__v8df) __B,
3543 -(__v8df) __C,
3544 (__mmask8) __U, __R);
3547 extern __inline __m512d
3548 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3549 _mm512_mask3_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3550 __mmask8 __U, const int __R)
3552 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3553 (__v8df) __B,
3554 (__v8df) __C,
3555 (__mmask8) __U, __R);
3558 extern __inline __m512d
3559 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3560 _mm512_maskz_fmsubadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3561 __m512d __C, const int __R)
3563 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3564 (__v8df) __B,
3565 -(__v8df) __C,
3566 (__mmask8) __U, __R);
3569 extern __inline __m512
3570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3571 _mm512_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3573 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3574 (__v16sf) __B,
3575 -(__v16sf) __C,
3576 (__mmask16) -1, __R);
3579 extern __inline __m512
3580 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3581 _mm512_mask_fmsubadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3582 __m512 __C, const int __R)
3584 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3585 (__v16sf) __B,
3586 -(__v16sf) __C,
3587 (__mmask16) __U, __R);
3590 extern __inline __m512
3591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3592 _mm512_mask3_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3593 __mmask16 __U, const int __R)
3595 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3596 (__v16sf) __B,
3597 (__v16sf) __C,
3598 (__mmask16) __U, __R);
3601 extern __inline __m512
3602 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3603 _mm512_maskz_fmsubadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3604 __m512 __C, const int __R)
3606 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3607 (__v16sf) __B,
3608 -(__v16sf) __C,
3609 (__mmask16) __U, __R);
3612 extern __inline __m512d
3613 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3614 _mm512_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3616 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
3617 (__v8df) __B,
3618 (__v8df) __C,
3619 (__mmask8) -1, __R);
3622 extern __inline __m512d
3623 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3624 _mm512_mask_fnmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3625 __m512d __C, const int __R)
3627 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
3628 (__v8df) __B,
3629 (__v8df) __C,
3630 (__mmask8) __U, __R);
3633 extern __inline __m512d
3634 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3635 _mm512_mask3_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3636 __mmask8 __U, const int __R)
3638 return (__m512d) __builtin_ia32_vfnmaddpd512_mask3 ((__v8df) __A,
3639 (__v8df) __B,
3640 (__v8df) __C,
3641 (__mmask8) __U, __R);
3644 extern __inline __m512d
3645 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3646 _mm512_maskz_fnmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3647 __m512d __C, const int __R)
3649 return (__m512d) __builtin_ia32_vfnmaddpd512_maskz ((__v8df) __A,
3650 (__v8df) __B,
3651 (__v8df) __C,
3652 (__mmask8) __U, __R);
3655 extern __inline __m512
3656 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3657 _mm512_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3659 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
3660 (__v16sf) __B,
3661 (__v16sf) __C,
3662 (__mmask16) -1, __R);
3665 extern __inline __m512
3666 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3667 _mm512_mask_fnmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3668 __m512 __C, const int __R)
3670 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
3671 (__v16sf) __B,
3672 (__v16sf) __C,
3673 (__mmask16) __U, __R);
3676 extern __inline __m512
3677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3678 _mm512_mask3_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3679 __mmask16 __U, const int __R)
3681 return (__m512) __builtin_ia32_vfnmaddps512_mask3 ((__v16sf) __A,
3682 (__v16sf) __B,
3683 (__v16sf) __C,
3684 (__mmask16) __U, __R);
3687 extern __inline __m512
3688 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3689 _mm512_maskz_fnmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3690 __m512 __C, const int __R)
3692 return (__m512) __builtin_ia32_vfnmaddps512_maskz ((__v16sf) __A,
3693 (__v16sf) __B,
3694 (__v16sf) __C,
3695 (__mmask16) __U, __R);
3698 extern __inline __m512d
3699 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3700 _mm512_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3702 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
3703 (__v8df) __B,
3704 (__v8df) __C,
3705 (__mmask8) -1, __R);
3708 extern __inline __m512d
3709 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3710 _mm512_mask_fnmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3711 __m512d __C, const int __R)
3713 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
3714 (__v8df) __B,
3715 (__v8df) __C,
3716 (__mmask8) __U, __R);
3719 extern __inline __m512d
3720 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3721 _mm512_mask3_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3722 __mmask8 __U, const int __R)
3724 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
3725 (__v8df) __B,
3726 (__v8df) __C,
3727 (__mmask8) __U, __R);
3730 extern __inline __m512d
3731 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3732 _mm512_maskz_fnmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3733 __m512d __C, const int __R)
3735 return (__m512d) __builtin_ia32_vfnmsubpd512_maskz ((__v8df) __A,
3736 (__v8df) __B,
3737 (__v8df) __C,
3738 (__mmask8) __U, __R);
3741 extern __inline __m512
3742 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3743 _mm512_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3745 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
3746 (__v16sf) __B,
3747 (__v16sf) __C,
3748 (__mmask16) -1, __R);
3751 extern __inline __m512
3752 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3753 _mm512_mask_fnmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3754 __m512 __C, const int __R)
3756 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
3757 (__v16sf) __B,
3758 (__v16sf) __C,
3759 (__mmask16) __U, __R);
3762 extern __inline __m512
3763 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3764 _mm512_mask3_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3765 __mmask16 __U, const int __R)
3767 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
3768 (__v16sf) __B,
3769 (__v16sf) __C,
3770 (__mmask16) __U, __R);
3773 extern __inline __m512
3774 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3775 _mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3776 __m512 __C, const int __R)
3778 return (__m512) __builtin_ia32_vfnmsubps512_maskz ((__v16sf) __A,
3779 (__v16sf) __B,
3780 (__v16sf) __C,
3781 (__mmask16) __U, __R);
3783 #else
3784 #define _mm512_fmadd_round_pd(A, B, C, R) \
3785 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, -1, R)
3787 #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
3788 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3790 #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
3791 (__m512d)__builtin_ia32_vfmaddpd512_mask3(A, B, C, U, R)
3793 #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
3794 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, C, U, R)
3796 #define _mm512_fmadd_round_ps(A, B, C, R) \
3797 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, -1, R)
3799 #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
3800 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, U, R)
3802 #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
3803 (__m512)__builtin_ia32_vfmaddps512_mask3(A, B, C, U, R)
3805 #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
3806 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, C, U, R)
3808 #define _mm512_fmsub_round_pd(A, B, C, R) \
3809 (__m512d)__builtin_ia32_vfmsubpd512_mask(A, B, C, -1, R)
3811 #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
3812 (__m512d)__builtin_ia32_vfmsubpd512_mask(A, B, C, U, R)
3814 #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
3815 (__m512d)__builtin_ia32_vfmsubpd512_mask3(A, B, C, U, R)
3817 #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
3818 (__m512d)__builtin_ia32_vfmsubpd512_maskz(A, B, C, U, R)
3820 #define _mm512_fmsub_round_ps(A, B, C, R) \
3821 (__m512)__builtin_ia32_vfmsubps512_mask(A, B, C, -1, R)
3823 #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
3824 (__m512)__builtin_ia32_vfmsubps512_mask(A, B, C, U, R)
3826 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
3827 (__m512)__builtin_ia32_vfmsubps512_mask3(A, B, C, U, R)
3829 #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
3830 (__m512)__builtin_ia32_vfmsubps512_maskz(A, B, C, U, R)
3832 #define _mm512_fmaddsub_round_pd(A, B, C, R) \
3833 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, -1, R)
3835 #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
3836 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, U, R)
3838 #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
3839 (__m512d)__builtin_ia32_vfmaddsubpd512_mask3(A, B, C, U, R)
3841 #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
3842 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, C, U, R)
3844 #define _mm512_fmaddsub_round_ps(A, B, C, R) \
3845 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, -1, R)
3847 #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
3848 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, U, R)
3850 #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
3851 (__m512)__builtin_ia32_vfmaddsubps512_mask3(A, B, C, U, R)
3853 #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
3854 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, C, U, R)
3856 #define _mm512_fmsubadd_round_pd(A, B, C, R) \
3857 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), -1, R)
3859 #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
3860 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), U, R)
3862 #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
3863 (__m512d)__builtin_ia32_vfmsubaddpd512_mask3(A, B, C, U, R)
3865 #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
3866 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, -(C), U, R)
3868 #define _mm512_fmsubadd_round_ps(A, B, C, R) \
3869 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), -1, R)
3871 #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
3872 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), U, R)
3874 #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
3875 (__m512)__builtin_ia32_vfmsubaddps512_mask3(A, B, C, U, R)
3877 #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
3878 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, -(C), U, R)
3880 #define _mm512_fnmadd_round_pd(A, B, C, R) \
3881 (__m512d)__builtin_ia32_vfnmaddpd512_mask(A, B, C, -1, R)
3883 #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
3884 (__m512d)__builtin_ia32_vfnmaddpd512_mask(A, B, C, U, R)
3886 #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
3887 (__m512d)__builtin_ia32_vfnmaddpd512_mask3(A, B, C, U, R)
3889 #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
3890 (__m512d)__builtin_ia32_vfnmaddpd512_maskz(A, B, C, U, R)
3892 #define _mm512_fnmadd_round_ps(A, B, C, R) \
3893 (__m512)__builtin_ia32_vfnmaddps512_mask(A, B, C, -1, R)
3895 #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
3896 (__m512)__builtin_ia32_vfnmaddps512_mask(A, B, C, U, R)
3898 #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
3899 (__m512)__builtin_ia32_vfnmaddps512_mask3(A, B, C, U, R)
3901 #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
3902 (__m512)__builtin_ia32_vfnmaddps512_maskz(A, B, C, U, R)
3904 #define _mm512_fnmsub_round_pd(A, B, C, R) \
3905 (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, -1, R)
3907 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
3908 (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, U, R)
3910 #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
3911 (__m512d)__builtin_ia32_vfnmsubpd512_mask3(A, B, C, U, R)
3913 #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
3914 (__m512d)__builtin_ia32_vfnmsubpd512_maskz(A, B, C, U, R)
3916 #define _mm512_fnmsub_round_ps(A, B, C, R) \
3917 (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, -1, R)
3919 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
3920 (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, U, R)
3922 #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
3923 (__m512)__builtin_ia32_vfnmsubps512_mask3(A, B, C, U, R)
3925 #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
3926 (__m512)__builtin_ia32_vfnmsubps512_maskz(A, B, C, U, R)
3927 #endif
3929 extern __inline __m512i
3930 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3931 _mm512_abs_epi64 (__m512i __A)
3933 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3934 (__v8di)
3935 _mm512_undefined_epi32 (),
3936 (__mmask8) -1);
3939 extern __inline __m512i
3940 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3941 _mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
3943 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3944 (__v8di) __W,
3945 (__mmask8) __U);
3948 extern __inline __m512i
3949 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3950 _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
3952 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3953 (__v8di)
3954 _mm512_setzero_si512 (),
3955 (__mmask8) __U);
3958 extern __inline __m512i
3959 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3960 _mm512_abs_epi32 (__m512i __A)
3962 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3963 (__v16si)
3964 _mm512_undefined_epi32 (),
3965 (__mmask16) -1);
3968 extern __inline __m512i
3969 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3970 _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
3972 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3973 (__v16si) __W,
3974 (__mmask16) __U);
3977 extern __inline __m512i
3978 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3979 _mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
3981 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3982 (__v16si)
3983 _mm512_setzero_si512 (),
3984 (__mmask16) __U);
3987 extern __inline __m512
3988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3989 _mm512_broadcastss_ps (__m128 __A)
3991 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3992 (__v16sf)
3993 _mm512_undefined_ps (),
3994 (__mmask16) -1);
3997 extern __inline __m512
3998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3999 _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
4001 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
4002 (__v16sf) __O, __M);
4005 extern __inline __m512
4006 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4007 _mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
4009 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
4010 (__v16sf)
4011 _mm512_setzero_ps (),
4012 __M);
4015 extern __inline __m512d
4016 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4017 _mm512_broadcastsd_pd (__m128d __A)
4019 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
4020 (__v8df)
4021 _mm512_undefined_pd (),
4022 (__mmask8) -1);
4025 extern __inline __m512d
4026 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4027 _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
4029 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
4030 (__v8df) __O, __M);
4033 extern __inline __m512d
4034 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4035 _mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
4037 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
4038 (__v8df)
4039 _mm512_setzero_pd (),
4040 __M);
4043 extern __inline __m512i
4044 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4045 _mm512_broadcastd_epi32 (__m128i __A)
4047 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
4048 (__v16si)
4049 _mm512_undefined_epi32 (),
4050 (__mmask16) -1);
4053 extern __inline __m512i
4054 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4055 _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
4057 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
4058 (__v16si) __O, __M);
4061 extern __inline __m512i
4062 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4063 _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
4065 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
4066 (__v16si)
4067 _mm512_setzero_si512 (),
4068 __M);
4071 extern __inline __m512i
4072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4073 _mm512_set1_epi32 (int __A)
4075 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
4076 (__v16si)
4077 _mm512_undefined_epi32 (),
4078 (__mmask16)(-1));
4081 extern __inline __m512i
4082 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4083 _mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
4085 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O,
4086 __M);
4089 extern __inline __m512i
4090 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4091 _mm512_maskz_set1_epi32 (__mmask16 __M, int __A)
4093 return (__m512i)
4094 __builtin_ia32_pbroadcastd512_gpr_mask (__A,
4095 (__v16si) _mm512_setzero_si512 (),
4096 __M);
4099 extern __inline __m512i
4100 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4101 _mm512_broadcastq_epi64 (__m128i __A)
4103 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
4104 (__v8di)
4105 _mm512_undefined_epi32 (),
4106 (__mmask8) -1);
4109 extern __inline __m512i
4110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4111 _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
4113 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
4114 (__v8di) __O, __M);
4117 extern __inline __m512i
4118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4119 _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
4121 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
4122 (__v8di)
4123 _mm512_setzero_si512 (),
4124 __M);
4127 extern __inline __m512i
4128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4129 _mm512_set1_epi64 (long long __A)
4131 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
4132 (__v8di)
4133 _mm512_undefined_epi32 (),
4134 (__mmask8)(-1));
4137 extern __inline __m512i
4138 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4139 _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
4141 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
4142 __M);
4145 extern __inline __m512i
4146 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4147 _mm512_maskz_set1_epi64 (__mmask8 __M, long long __A)
4149 return (__m512i)
4150 __builtin_ia32_pbroadcastq512_gpr_mask (__A,
4151 (__v8di) _mm512_setzero_si512 (),
4152 __M);
4155 extern __inline __m512
4156 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4157 _mm512_broadcast_f32x4 (__m128 __A)
4159 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
4160 (__v16sf)
4161 _mm512_undefined_ps (),
4162 (__mmask16) -1);
4165 extern __inline __m512
4166 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4167 _mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A)
4169 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
4170 (__v16sf) __O,
4171 __M);
4174 extern __inline __m512
4175 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4176 _mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A)
4178 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
4179 (__v16sf)
4180 _mm512_setzero_ps (),
4181 __M);
4184 extern __inline __m512i
4185 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4186 _mm512_broadcast_i32x4 (__m128i __A)
4188 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
4189 (__v16si)
4190 _mm512_undefined_epi32 (),
4191 (__mmask16) -1);
4194 extern __inline __m512i
4195 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4196 _mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A)
4198 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
4199 (__v16si) __O,
4200 __M);
4203 extern __inline __m512i
4204 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4205 _mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A)
4207 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
4208 (__v16si)
4209 _mm512_setzero_si512 (),
4210 __M);
4213 extern __inline __m512d
4214 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4215 _mm512_broadcast_f64x4 (__m256d __A)
4217 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
4218 (__v8df)
4219 _mm512_undefined_pd (),
4220 (__mmask8) -1);
4223 extern __inline __m512d
4224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4225 _mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A)
4227 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
4228 (__v8df) __O,
4229 __M);
4232 extern __inline __m512d
4233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4234 _mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A)
4236 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
4237 (__v8df)
4238 _mm512_setzero_pd (),
4239 __M);
4242 extern __inline __m512i
4243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4244 _mm512_broadcast_i64x4 (__m256i __A)
4246 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
4247 (__v8di)
4248 _mm512_undefined_epi32 (),
4249 (__mmask8) -1);
4252 extern __inline __m512i
4253 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4254 _mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A)
4256 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
4257 (__v8di) __O,
4258 __M);
4261 extern __inline __m512i
4262 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4263 _mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A)
4265 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
4266 (__v8di)
4267 _mm512_setzero_si512 (),
4268 __M);
4271 typedef enum
4273 _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
4274 _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
4275 _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
4276 _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
4277 _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
4278 _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
4279 _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
4280 _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
4281 _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
4282 _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
4283 _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
4284 _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
4285 _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
4286 _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
4287 _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
4288 _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
4289 _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
4290 _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
4291 _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
4292 _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
4293 _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
4294 _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
4295 _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
4296 _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
4297 _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
4298 _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
4299 _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
4300 _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
4301 _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
4302 _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
4303 _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
4304 _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
4305 _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
4306 _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
4307 _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
4308 _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
4309 _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
4310 _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
4311 _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
4312 _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
4313 _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
4314 _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
4315 _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
4316 _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
4317 _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
4318 _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
4319 _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
4320 _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
4321 _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
4322 _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
4323 _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
4324 _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
4325 _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
4326 _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
4327 _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
4328 _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
4329 _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
4330 _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
4331 _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
4332 _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
4333 _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
4334 _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
4335 _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
4336 _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
4337 _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
4338 _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
4339 _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
4340 _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
4341 _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
4342 _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
4343 _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
4344 _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
4345 _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
4346 _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
4347 _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
4348 _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
4349 _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
4350 _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
4351 _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
4352 _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
4353 _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
4354 _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
4355 _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
4356 _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
4357 _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
4358 _MM_PERM_DDDD = 0xFF
4359 } _MM_PERM_ENUM;
4361 #ifdef __OPTIMIZE__
4362 extern __inline __m512i
4363 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4364 _mm512_shuffle_epi32 (__m512i __A, _MM_PERM_ENUM __mask)
4366 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
4367 __mask,
4368 (__v16si)
4369 _mm512_undefined_epi32 (),
4370 (__mmask16) -1);
4373 extern __inline __m512i
4374 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4375 _mm512_mask_shuffle_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
4376 _MM_PERM_ENUM __mask)
4378 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
4379 __mask,
4380 (__v16si) __W,
4381 (__mmask16) __U);
4384 extern __inline __m512i
4385 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4386 _mm512_maskz_shuffle_epi32 (__mmask16 __U, __m512i __A, _MM_PERM_ENUM __mask)
4388 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
4389 __mask,
4390 (__v16si)
4391 _mm512_setzero_si512 (),
4392 (__mmask16) __U);
4395 extern __inline __m512i
4396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4397 _mm512_shuffle_i64x2 (__m512i __A, __m512i __B, const int __imm)
4399 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
4400 (__v8di) __B, __imm,
4401 (__v8di)
4402 _mm512_undefined_epi32 (),
4403 (__mmask8) -1);
4406 extern __inline __m512i
4407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4408 _mm512_mask_shuffle_i64x2 (__m512i __W, __mmask8 __U, __m512i __A,
4409 __m512i __B, const int __imm)
4411 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
4412 (__v8di) __B, __imm,
4413 (__v8di) __W,
4414 (__mmask8) __U);
4417 extern __inline __m512i
4418 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4419 _mm512_maskz_shuffle_i64x2 (__mmask8 __U, __m512i __A, __m512i __B,
4420 const int __imm)
4422 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
4423 (__v8di) __B, __imm,
4424 (__v8di)
4425 _mm512_setzero_si512 (),
4426 (__mmask8) __U);
4429 extern __inline __m512i
4430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4431 _mm512_shuffle_i32x4 (__m512i __A, __m512i __B, const int __imm)
4433 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
4434 (__v16si) __B,
4435 __imm,
4436 (__v16si)
4437 _mm512_undefined_epi32 (),
4438 (__mmask16) -1);
4441 extern __inline __m512i
4442 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4443 _mm512_mask_shuffle_i32x4 (__m512i __W, __mmask16 __U, __m512i __A,
4444 __m512i __B, const int __imm)
4446 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
4447 (__v16si) __B,
4448 __imm,
4449 (__v16si) __W,
4450 (__mmask16) __U);
4453 extern __inline __m512i
4454 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4455 _mm512_maskz_shuffle_i32x4 (__mmask16 __U, __m512i __A, __m512i __B,
4456 const int __imm)
4458 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
4459 (__v16si) __B,
4460 __imm,
4461 (__v16si)
4462 _mm512_setzero_si512 (),
4463 (__mmask16) __U);
4466 extern __inline __m512d
4467 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4468 _mm512_shuffle_f64x2 (__m512d __A, __m512d __B, const int __imm)
4470 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
4471 (__v8df) __B, __imm,
4472 (__v8df)
4473 _mm512_undefined_pd (),
4474 (__mmask8) -1);
4477 extern __inline __m512d
4478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4479 _mm512_mask_shuffle_f64x2 (__m512d __W, __mmask8 __U, __m512d __A,
4480 __m512d __B, const int __imm)
4482 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
4483 (__v8df) __B, __imm,
4484 (__v8df) __W,
4485 (__mmask8) __U);
4488 extern __inline __m512d
4489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4490 _mm512_maskz_shuffle_f64x2 (__mmask8 __U, __m512d __A, __m512d __B,
4491 const int __imm)
4493 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
4494 (__v8df) __B, __imm,
4495 (__v8df)
4496 _mm512_setzero_pd (),
4497 (__mmask8) __U);
4500 extern __inline __m512
4501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4502 _mm512_shuffle_f32x4 (__m512 __A, __m512 __B, const int __imm)
4504 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4505 (__v16sf) __B, __imm,
4506 (__v16sf)
4507 _mm512_undefined_ps (),
4508 (__mmask16) -1);
4511 extern __inline __m512
4512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4513 _mm512_mask_shuffle_f32x4 (__m512 __W, __mmask16 __U, __m512 __A,
4514 __m512 __B, const int __imm)
4516 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4517 (__v16sf) __B, __imm,
4518 (__v16sf) __W,
4519 (__mmask16) __U);
4522 extern __inline __m512
4523 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4524 _mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B,
4525 const int __imm)
4527 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4528 (__v16sf) __B, __imm,
4529 (__v16sf)
4530 _mm512_setzero_ps (),
4531 (__mmask16) __U);
4534 #else
4535 #define _mm512_shuffle_epi32(X, C) \
4536 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4537 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
4538 (__mmask16)-1))
4540 #define _mm512_mask_shuffle_epi32(W, U, X, C) \
4541 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4542 (__v16si)(__m512i)(W),\
4543 (__mmask16)(U)))
4545 #define _mm512_maskz_shuffle_epi32(U, X, C) \
4546 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4547 (__v16si)(__m512i)_mm512_setzero_si512 (),\
4548 (__mmask16)(U)))
4550 #define _mm512_shuffle_i64x2(X, Y, C) \
4551 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4552 (__v8di)(__m512i)(Y), (int)(C),\
4553 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
4554 (__mmask8)-1))
4556 #define _mm512_mask_shuffle_i64x2(W, U, X, Y, C) \
4557 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4558 (__v8di)(__m512i)(Y), (int)(C),\
4559 (__v8di)(__m512i)(W),\
4560 (__mmask8)(U)))
4562 #define _mm512_maskz_shuffle_i64x2(U, X, Y, C) \
4563 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4564 (__v8di)(__m512i)(Y), (int)(C),\
4565 (__v8di)(__m512i)_mm512_setzero_si512 (),\
4566 (__mmask8)(U)))
4568 #define _mm512_shuffle_i32x4(X, Y, C) \
4569 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4570 (__v16si)(__m512i)(Y), (int)(C),\
4571 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
4572 (__mmask16)-1))
4574 #define _mm512_mask_shuffle_i32x4(W, U, X, Y, C) \
4575 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4576 (__v16si)(__m512i)(Y), (int)(C),\
4577 (__v16si)(__m512i)(W),\
4578 (__mmask16)(U)))
4580 #define _mm512_maskz_shuffle_i32x4(U, X, Y, C) \
4581 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4582 (__v16si)(__m512i)(Y), (int)(C),\
4583 (__v16si)(__m512i)_mm512_setzero_si512 (),\
4584 (__mmask16)(U)))
4586 #define _mm512_shuffle_f64x2(X, Y, C) \
4587 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4588 (__v8df)(__m512d)(Y), (int)(C),\
4589 (__v8df)(__m512d)_mm512_undefined_pd(),\
4590 (__mmask8)-1))
4592 #define _mm512_mask_shuffle_f64x2(W, U, X, Y, C) \
4593 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4594 (__v8df)(__m512d)(Y), (int)(C),\
4595 (__v8df)(__m512d)(W),\
4596 (__mmask8)(U)))
4598 #define _mm512_maskz_shuffle_f64x2(U, X, Y, C) \
4599 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4600 (__v8df)(__m512d)(Y), (int)(C),\
4601 (__v8df)(__m512d)_mm512_setzero_pd(),\
4602 (__mmask8)(U)))
4604 #define _mm512_shuffle_f32x4(X, Y, C) \
4605 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4606 (__v16sf)(__m512)(Y), (int)(C),\
4607 (__v16sf)(__m512)_mm512_undefined_ps(),\
4608 (__mmask16)-1))
4610 #define _mm512_mask_shuffle_f32x4(W, U, X, Y, C) \
4611 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4612 (__v16sf)(__m512)(Y), (int)(C),\
4613 (__v16sf)(__m512)(W),\
4614 (__mmask16)(U)))
4616 #define _mm512_maskz_shuffle_f32x4(U, X, Y, C) \
4617 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4618 (__v16sf)(__m512)(Y), (int)(C),\
4619 (__v16sf)(__m512)_mm512_setzero_ps(),\
4620 (__mmask16)(U)))
4621 #endif
4623 extern __inline __m512i
4624 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4625 _mm512_rolv_epi32 (__m512i __A, __m512i __B)
4627 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4628 (__v16si) __B,
4629 (__v16si)
4630 _mm512_undefined_epi32 (),
4631 (__mmask16) -1);
4634 extern __inline __m512i
4635 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4636 _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4638 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4639 (__v16si) __B,
4640 (__v16si) __W,
4641 (__mmask16) __U);
4644 extern __inline __m512i
4645 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4646 _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4648 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4649 (__v16si) __B,
4650 (__v16si)
4651 _mm512_setzero_si512 (),
4652 (__mmask16) __U);
4655 extern __inline __m512i
4656 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4657 _mm512_rorv_epi32 (__m512i __A, __m512i __B)
4659 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4660 (__v16si) __B,
4661 (__v16si)
4662 _mm512_undefined_epi32 (),
4663 (__mmask16) -1);
4666 extern __inline __m512i
4667 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4668 _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4670 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4671 (__v16si) __B,
4672 (__v16si) __W,
4673 (__mmask16) __U);
4676 extern __inline __m512i
4677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4678 _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4680 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4681 (__v16si) __B,
4682 (__v16si)
4683 _mm512_setzero_si512 (),
4684 (__mmask16) __U);
4687 extern __inline __m512i
4688 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4689 _mm512_rolv_epi64 (__m512i __A, __m512i __B)
4691 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4692 (__v8di) __B,
4693 (__v8di)
4694 _mm512_undefined_epi32 (),
4695 (__mmask8) -1);
4698 extern __inline __m512i
4699 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4700 _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4702 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4703 (__v8di) __B,
4704 (__v8di) __W,
4705 (__mmask8) __U);
4708 extern __inline __m512i
4709 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4710 _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4712 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4713 (__v8di) __B,
4714 (__v8di)
4715 _mm512_setzero_si512 (),
4716 (__mmask8) __U);
4719 extern __inline __m512i
4720 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4721 _mm512_rorv_epi64 (__m512i __A, __m512i __B)
4723 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4724 (__v8di) __B,
4725 (__v8di)
4726 _mm512_undefined_epi32 (),
4727 (__mmask8) -1);
4730 extern __inline __m512i
4731 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4732 _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4734 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4735 (__v8di) __B,
4736 (__v8di) __W,
4737 (__mmask8) __U);
4740 extern __inline __m512i
4741 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4742 _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4744 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4745 (__v8di) __B,
4746 (__v8di)
4747 _mm512_setzero_si512 (),
4748 (__mmask8) __U);
4751 #ifdef __OPTIMIZE__
4752 extern __inline __m256i
4753 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4754 _mm512_cvtt_roundpd_epi32 (__m512d __A, const int __R)
4756 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4757 (__v8si)
4758 _mm256_undefined_si256 (),
4759 (__mmask8) -1, __R);
4762 extern __inline __m256i
4763 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4764 _mm512_mask_cvtt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4765 const int __R)
4767 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4768 (__v8si) __W,
4769 (__mmask8) __U, __R);
4772 extern __inline __m256i
4773 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4774 _mm512_maskz_cvtt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4776 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4777 (__v8si)
4778 _mm256_setzero_si256 (),
4779 (__mmask8) __U, __R);
4782 extern __inline __m256i
4783 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4784 _mm512_cvtt_roundpd_epu32 (__m512d __A, const int __R)
4786 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4787 (__v8si)
4788 _mm256_undefined_si256 (),
4789 (__mmask8) -1, __R);
4792 extern __inline __m256i
4793 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4794 _mm512_mask_cvtt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4795 const int __R)
4797 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4798 (__v8si) __W,
4799 (__mmask8) __U, __R);
4802 extern __inline __m256i
4803 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4804 _mm512_maskz_cvtt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4806 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4807 (__v8si)
4808 _mm256_setzero_si256 (),
4809 (__mmask8) __U, __R);
4811 #else
4812 #define _mm512_cvtt_roundpd_epi32(A, B) \
4813 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4815 #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, B) \
4816 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)(W), U, B))
4818 #define _mm512_maskz_cvtt_roundpd_epi32(U, A, B) \
4819 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4821 #define _mm512_cvtt_roundpd_epu32(A, B) \
4822 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4824 #define _mm512_mask_cvtt_roundpd_epu32(W, U, A, B) \
4825 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)(W), U, B))
4827 #define _mm512_maskz_cvtt_roundpd_epu32(U, A, B) \
4828 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4829 #endif
4831 #ifdef __OPTIMIZE__
4832 extern __inline __m256i
4833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4834 _mm512_cvt_roundpd_epi32 (__m512d __A, const int __R)
4836 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4837 (__v8si)
4838 _mm256_undefined_si256 (),
4839 (__mmask8) -1, __R);
4842 extern __inline __m256i
4843 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4844 _mm512_mask_cvt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4845 const int __R)
4847 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4848 (__v8si) __W,
4849 (__mmask8) __U, __R);
4852 extern __inline __m256i
4853 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4854 _mm512_maskz_cvt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4856 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4857 (__v8si)
4858 _mm256_setzero_si256 (),
4859 (__mmask8) __U, __R);
4862 extern __inline __m256i
4863 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4864 _mm512_cvt_roundpd_epu32 (__m512d __A, const int __R)
4866 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4867 (__v8si)
4868 _mm256_undefined_si256 (),
4869 (__mmask8) -1, __R);
4872 extern __inline __m256i
4873 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4874 _mm512_mask_cvt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4875 const int __R)
4877 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4878 (__v8si) __W,
4879 (__mmask8) __U, __R);
4882 extern __inline __m256i
4883 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4884 _mm512_maskz_cvt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4886 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4887 (__v8si)
4888 _mm256_setzero_si256 (),
4889 (__mmask8) __U, __R);
4891 #else
4892 #define _mm512_cvt_roundpd_epi32(A, B) \
4893 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4895 #define _mm512_mask_cvt_roundpd_epi32(W, U, A, B) \
4896 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)(W), U, B))
4898 #define _mm512_maskz_cvt_roundpd_epi32(U, A, B) \
4899 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4901 #define _mm512_cvt_roundpd_epu32(A, B) \
4902 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4904 #define _mm512_mask_cvt_roundpd_epu32(W, U, A, B) \
4905 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)(W), U, B))
4907 #define _mm512_maskz_cvt_roundpd_epu32(U, A, B) \
4908 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4909 #endif
4911 #ifdef __OPTIMIZE__
4912 extern __inline __m512i
4913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4914 _mm512_cvtt_roundps_epi32 (__m512 __A, const int __R)
4916 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4917 (__v16si)
4918 _mm512_undefined_epi32 (),
4919 (__mmask16) -1, __R);
4922 extern __inline __m512i
4923 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4924 _mm512_mask_cvtt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4925 const int __R)
4927 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4928 (__v16si) __W,
4929 (__mmask16) __U, __R);
4932 extern __inline __m512i
4933 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4934 _mm512_maskz_cvtt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4936 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4937 (__v16si)
4938 _mm512_setzero_si512 (),
4939 (__mmask16) __U, __R);
4942 extern __inline __m512i
4943 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4944 _mm512_cvtt_roundps_epu32 (__m512 __A, const int __R)
4946 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4947 (__v16si)
4948 _mm512_undefined_epi32 (),
4949 (__mmask16) -1, __R);
4952 extern __inline __m512i
4953 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4954 _mm512_mask_cvtt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
4955 const int __R)
4957 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4958 (__v16si) __W,
4959 (__mmask16) __U, __R);
4962 extern __inline __m512i
4963 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4964 _mm512_maskz_cvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
4966 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4967 (__v16si)
4968 _mm512_setzero_si512 (),
4969 (__mmask16) __U, __R);
4971 #else
4972 #define _mm512_cvtt_roundps_epi32(A, B) \
4973 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
4975 #define _mm512_mask_cvtt_roundps_epi32(W, U, A, B) \
4976 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)(W), U, B))
4978 #define _mm512_maskz_cvtt_roundps_epi32(U, A, B) \
4979 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4981 #define _mm512_cvtt_roundps_epu32(A, B) \
4982 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
4984 #define _mm512_mask_cvtt_roundps_epu32(W, U, A, B) \
4985 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)(W), U, B))
4987 #define _mm512_maskz_cvtt_roundps_epu32(U, A, B) \
4988 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4989 #endif
4991 #ifdef __OPTIMIZE__
4992 extern __inline __m512i
4993 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4994 _mm512_cvt_roundps_epi32 (__m512 __A, const int __R)
4996 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4997 (__v16si)
4998 _mm512_undefined_epi32 (),
4999 (__mmask16) -1, __R);
5002 extern __inline __m512i
5003 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5004 _mm512_mask_cvt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
5005 const int __R)
5007 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
5008 (__v16si) __W,
5009 (__mmask16) __U, __R);
5012 extern __inline __m512i
5013 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5014 _mm512_maskz_cvt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
5016 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
5017 (__v16si)
5018 _mm512_setzero_si512 (),
5019 (__mmask16) __U, __R);
5022 extern __inline __m512i
5023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5024 _mm512_cvt_roundps_epu32 (__m512 __A, const int __R)
5026 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
5027 (__v16si)
5028 _mm512_undefined_epi32 (),
5029 (__mmask16) -1, __R);
5032 extern __inline __m512i
5033 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5034 _mm512_mask_cvt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
5035 const int __R)
5037 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
5038 (__v16si) __W,
5039 (__mmask16) __U, __R);
5042 extern __inline __m512i
5043 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5044 _mm512_maskz_cvt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
5046 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
5047 (__v16si)
5048 _mm512_setzero_si512 (),
5049 (__mmask16) __U, __R);
5051 #else
5052 #define _mm512_cvt_roundps_epi32(A, B) \
5053 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
5055 #define _mm512_mask_cvt_roundps_epi32(W, U, A, B) \
5056 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)(W), U, B))
5058 #define _mm512_maskz_cvt_roundps_epi32(U, A, B) \
5059 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
5061 #define _mm512_cvt_roundps_epu32(A, B) \
5062 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
5064 #define _mm512_mask_cvt_roundps_epu32(W, U, A, B) \
5065 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)(W), U, B))
5067 #define _mm512_maskz_cvt_roundps_epu32(U, A, B) \
5068 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
5069 #endif
5071 extern __inline __m128d
5072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5073 _mm_cvtu32_sd (__m128d __A, unsigned __B)
5075 return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
5078 #ifdef __x86_64__
5079 #ifdef __OPTIMIZE__
5080 extern __inline __m128d
5081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5082 _mm_cvt_roundu64_sd (__m128d __A, unsigned long long __B, const int __R)
5084 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, __R);
5087 extern __inline __m128d
5088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5089 _mm_cvt_roundi64_sd (__m128d __A, long long __B, const int __R)
5091 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
5094 extern __inline __m128d
5095 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5096 _mm_cvt_roundsi64_sd (__m128d __A, long long __B, const int __R)
5098 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
5100 #else
5101 #define _mm_cvt_roundu64_sd(A, B, C) \
5102 (__m128d)__builtin_ia32_cvtusi2sd64(A, B, C)
5104 #define _mm_cvt_roundi64_sd(A, B, C) \
5105 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
5107 #define _mm_cvt_roundsi64_sd(A, B, C) \
5108 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
5109 #endif
5111 #endif
5113 #ifdef __OPTIMIZE__
5114 extern __inline __m128
5115 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5116 _mm_cvt_roundu32_ss (__m128 __A, unsigned __B, const int __R)
5118 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, __R);
5121 extern __inline __m128
5122 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5123 _mm_cvt_roundsi32_ss (__m128 __A, int __B, const int __R)
5125 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
5128 extern __inline __m128
5129 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5130 _mm_cvt_roundi32_ss (__m128 __A, int __B, const int __R)
5132 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
5134 #else
5135 #define _mm_cvt_roundu32_ss(A, B, C) \
5136 (__m128)__builtin_ia32_cvtusi2ss32(A, B, C)
5138 #define _mm_cvt_roundi32_ss(A, B, C) \
5139 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
5141 #define _mm_cvt_roundsi32_ss(A, B, C) \
5142 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
5143 #endif
5145 #ifdef __x86_64__
5146 #ifdef __OPTIMIZE__
5147 extern __inline __m128
5148 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5149 _mm_cvt_roundu64_ss (__m128 __A, unsigned long long __B, const int __R)
5151 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, __R);
5154 extern __inline __m128
5155 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5156 _mm_cvt_roundsi64_ss (__m128 __A, long long __B, const int __R)
5158 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
5161 extern __inline __m128
5162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5163 _mm_cvt_roundi64_ss (__m128 __A, long long __B, const int __R)
5165 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
5167 #else
5168 #define _mm_cvt_roundu64_ss(A, B, C) \
5169 (__m128)__builtin_ia32_cvtusi2ss64(A, B, C)
5171 #define _mm_cvt_roundi64_ss(A, B, C) \
5172 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
5174 #define _mm_cvt_roundsi64_ss(A, B, C) \
5175 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
5176 #endif
5178 #endif
5180 extern __inline __m128i
5181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5182 _mm512_cvtepi32_epi8 (__m512i __A)
5184 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
5185 (__v16qi)
5186 _mm_undefined_si128 (),
5187 (__mmask16) -1);
5190 extern __inline void
5191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5192 _mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
5194 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
5197 extern __inline __m128i
5198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5199 _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
5201 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
5202 (__v16qi) __O, __M);
5205 extern __inline __m128i
5206 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5207 _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
5209 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
5210 (__v16qi)
5211 _mm_setzero_si128 (),
5212 __M);
5215 extern __inline __m128i
5216 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5217 _mm512_cvtsepi32_epi8 (__m512i __A)
5219 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
5220 (__v16qi)
5221 _mm_undefined_si128 (),
5222 (__mmask16) -1);
5225 extern __inline void
5226 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5227 _mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
5229 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
5232 extern __inline __m128i
5233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5234 _mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
5236 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
5237 (__v16qi) __O, __M);
5240 extern __inline __m128i
5241 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5242 _mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
5244 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
5245 (__v16qi)
5246 _mm_setzero_si128 (),
5247 __M);
5250 extern __inline __m128i
5251 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5252 _mm512_cvtusepi32_epi8 (__m512i __A)
5254 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
5255 (__v16qi)
5256 _mm_undefined_si128 (),
5257 (__mmask16) -1);
5260 extern __inline void
5261 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5262 _mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
5264 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
5267 extern __inline __m128i
5268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5269 _mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
5271 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
5272 (__v16qi) __O,
5273 __M);
5276 extern __inline __m128i
5277 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5278 _mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
5280 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
5281 (__v16qi)
5282 _mm_setzero_si128 (),
5283 __M);
5286 extern __inline __m256i
5287 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5288 _mm512_cvtepi32_epi16 (__m512i __A)
5290 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
5291 (__v16hi)
5292 _mm256_undefined_si256 (),
5293 (__mmask16) -1);
5296 extern __inline void
5297 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5298 _mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
5300 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
5303 extern __inline __m256i
5304 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5305 _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
5307 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
5308 (__v16hi) __O, __M);
5311 extern __inline __m256i
5312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5313 _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
5315 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
5316 (__v16hi)
5317 _mm256_setzero_si256 (),
5318 __M);
5321 extern __inline __m256i
5322 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5323 _mm512_cvtsepi32_epi16 (__m512i __A)
5325 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
5326 (__v16hi)
5327 _mm256_undefined_si256 (),
5328 (__mmask16) -1);
5331 extern __inline void
5332 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5333 _mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
5335 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
5338 extern __inline __m256i
5339 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5340 _mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
5342 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
5343 (__v16hi) __O, __M);
5346 extern __inline __m256i
5347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5348 _mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
5350 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
5351 (__v16hi)
5352 _mm256_setzero_si256 (),
5353 __M);
5356 extern __inline __m256i
5357 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5358 _mm512_cvtusepi32_epi16 (__m512i __A)
5360 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
5361 (__v16hi)
5362 _mm256_undefined_si256 (),
5363 (__mmask16) -1);
5366 extern __inline void
5367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5368 _mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
5370 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
5373 extern __inline __m256i
5374 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5375 _mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
5377 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
5378 (__v16hi) __O,
5379 __M);
5382 extern __inline __m256i
5383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5384 _mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
5386 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
5387 (__v16hi)
5388 _mm256_setzero_si256 (),
5389 __M);
5392 extern __inline __m256i
5393 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5394 _mm512_cvtepi64_epi32 (__m512i __A)
5396 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
5397 (__v8si)
5398 _mm256_undefined_si256 (),
5399 (__mmask8) -1);
5402 extern __inline void
5403 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5404 _mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
5406 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
5409 extern __inline __m256i
5410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5411 _mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
5413 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
5414 (__v8si) __O, __M);
5417 extern __inline __m256i
5418 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5419 _mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
5421 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
5422 (__v8si)
5423 _mm256_setzero_si256 (),
5424 __M);
5427 extern __inline __m256i
5428 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5429 _mm512_cvtsepi64_epi32 (__m512i __A)
5431 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
5432 (__v8si)
5433 _mm256_undefined_si256 (),
5434 (__mmask8) -1);
5437 extern __inline void
5438 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5439 _mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
5441 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
5444 extern __inline __m256i
5445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5446 _mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
5448 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
5449 (__v8si) __O, __M);
5452 extern __inline __m256i
5453 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5454 _mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
5456 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
5457 (__v8si)
5458 _mm256_setzero_si256 (),
5459 __M);
5462 extern __inline __m256i
5463 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5464 _mm512_cvtusepi64_epi32 (__m512i __A)
5466 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
5467 (__v8si)
5468 _mm256_undefined_si256 (),
5469 (__mmask8) -1);
5472 extern __inline void
5473 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5474 _mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
5476 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
5479 extern __inline __m256i
5480 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5481 _mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
5483 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
5484 (__v8si) __O, __M);
5487 extern __inline __m256i
5488 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5489 _mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
5491 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
5492 (__v8si)
5493 _mm256_setzero_si256 (),
5494 __M);
5497 extern __inline __m128i
5498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5499 _mm512_cvtepi64_epi16 (__m512i __A)
5501 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5502 (__v8hi)
5503 _mm_undefined_si128 (),
5504 (__mmask8) -1);
5507 extern __inline void
5508 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5509 _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
5511 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
5514 extern __inline __m128i
5515 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5516 _mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5518 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5519 (__v8hi) __O, __M);
5522 extern __inline __m128i
5523 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5524 _mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
5526 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5527 (__v8hi)
5528 _mm_setzero_si128 (),
5529 __M);
5532 extern __inline __m128i
5533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5534 _mm512_cvtsepi64_epi16 (__m512i __A)
5536 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5537 (__v8hi)
5538 _mm_undefined_si128 (),
5539 (__mmask8) -1);
5542 extern __inline void
5543 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5544 _mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
5546 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
5549 extern __inline __m128i
5550 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5551 _mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5553 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5554 (__v8hi) __O, __M);
5557 extern __inline __m128i
5558 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5559 _mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
5561 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5562 (__v8hi)
5563 _mm_setzero_si128 (),
5564 __M);
5567 extern __inline __m128i
5568 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5569 _mm512_cvtusepi64_epi16 (__m512i __A)
5571 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5572 (__v8hi)
5573 _mm_undefined_si128 (),
5574 (__mmask8) -1);
5577 extern __inline void
5578 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5579 _mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
5581 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
5584 extern __inline __m128i
5585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5586 _mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5588 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5589 (__v8hi) __O, __M);
5592 extern __inline __m128i
5593 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5594 _mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
5596 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5597 (__v8hi)
5598 _mm_setzero_si128 (),
5599 __M);
5602 extern __inline __m128i
5603 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5604 _mm512_cvtepi64_epi8 (__m512i __A)
5606 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5607 (__v16qi)
5608 _mm_undefined_si128 (),
5609 (__mmask8) -1);
5612 extern __inline void
5613 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5614 _mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5616 __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5619 extern __inline __m128i
5620 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5621 _mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5623 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5624 (__v16qi) __O, __M);
5627 extern __inline __m128i
5628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5629 _mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
5631 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5632 (__v16qi)
5633 _mm_setzero_si128 (),
5634 __M);
5637 extern __inline __m128i
5638 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5639 _mm512_cvtsepi64_epi8 (__m512i __A)
5641 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5642 (__v16qi)
5643 _mm_undefined_si128 (),
5644 (__mmask8) -1);
5647 extern __inline void
5648 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5649 _mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5651 __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5654 extern __inline __m128i
5655 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5656 _mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5658 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5659 (__v16qi) __O, __M);
5662 extern __inline __m128i
5663 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5664 _mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
5666 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5667 (__v16qi)
5668 _mm_setzero_si128 (),
5669 __M);
5672 extern __inline __m128i
5673 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5674 _mm512_cvtusepi64_epi8 (__m512i __A)
5676 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5677 (__v16qi)
5678 _mm_undefined_si128 (),
5679 (__mmask8) -1);
5682 extern __inline void
5683 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5684 _mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5686 __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5689 extern __inline __m128i
5690 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5691 _mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5693 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5694 (__v16qi) __O,
5695 __M);
5698 extern __inline __m128i
5699 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5700 _mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
5702 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5703 (__v16qi)
5704 _mm_setzero_si128 (),
5705 __M);
5708 extern __inline __m512d
5709 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5710 _mm512_cvtepi32_pd (__m256i __A)
5712 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5713 (__v8df)
5714 _mm512_undefined_pd (),
5715 (__mmask8) -1);
5718 extern __inline __m512d
5719 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5720 _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5722 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5723 (__v8df) __W,
5724 (__mmask8) __U);
5727 extern __inline __m512d
5728 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5729 _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
5731 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5732 (__v8df)
5733 _mm512_setzero_pd (),
5734 (__mmask8) __U);
5737 extern __inline __m512d
5738 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5739 _mm512_cvtepu32_pd (__m256i __A)
5741 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5742 (__v8df)
5743 _mm512_undefined_pd (),
5744 (__mmask8) -1);
5747 extern __inline __m512d
5748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5749 _mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5751 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5752 (__v8df) __W,
5753 (__mmask8) __U);
5756 extern __inline __m512d
5757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5758 _mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
5760 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5761 (__v8df)
5762 _mm512_setzero_pd (),
5763 (__mmask8) __U);
5766 #ifdef __OPTIMIZE__
5767 extern __inline __m512
5768 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5769 _mm512_cvt_roundepi32_ps (__m512i __A, const int __R)
5771 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5772 (__v16sf)
5773 _mm512_undefined_ps (),
5774 (__mmask16) -1, __R);
5777 extern __inline __m512
5778 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5779 _mm512_mask_cvt_roundepi32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5780 const int __R)
5782 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5783 (__v16sf) __W,
5784 (__mmask16) __U, __R);
5787 extern __inline __m512
5788 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5789 _mm512_maskz_cvt_roundepi32_ps (__mmask16 __U, __m512i __A, const int __R)
5791 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5792 (__v16sf)
5793 _mm512_setzero_ps (),
5794 (__mmask16) __U, __R);
5797 extern __inline __m512
5798 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5799 _mm512_cvt_roundepu32_ps (__m512i __A, const int __R)
5801 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5802 (__v16sf)
5803 _mm512_undefined_ps (),
5804 (__mmask16) -1, __R);
5807 extern __inline __m512
5808 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5809 _mm512_mask_cvt_roundepu32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5810 const int __R)
5812 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5813 (__v16sf) __W,
5814 (__mmask16) __U, __R);
5817 extern __inline __m512
5818 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5819 _mm512_maskz_cvt_roundepu32_ps (__mmask16 __U, __m512i __A, const int __R)
5821 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5822 (__v16sf)
5823 _mm512_setzero_ps (),
5824 (__mmask16) __U, __R);
5827 #else
5828 #define _mm512_cvt_roundepi32_ps(A, B) \
5829 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
5831 #define _mm512_mask_cvt_roundepi32_ps(W, U, A, B) \
5832 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), W, U, B)
5834 #define _mm512_maskz_cvt_roundepi32_ps(U, A, B) \
5835 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5837 #define _mm512_cvt_roundepu32_ps(A, B) \
5838 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
5840 #define _mm512_mask_cvt_roundepu32_ps(W, U, A, B) \
5841 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), W, U, B)
5843 #define _mm512_maskz_cvt_roundepu32_ps(U, A, B) \
5844 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5845 #endif
5847 #ifdef __OPTIMIZE__
5848 extern __inline __m256d
5849 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5850 _mm512_extractf64x4_pd (__m512d __A, const int __imm)
5852 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5853 __imm,
5854 (__v4df)
5855 _mm256_undefined_pd (),
5856 (__mmask8) -1);
5859 extern __inline __m256d
5860 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5861 _mm512_mask_extractf64x4_pd (__m256d __W, __mmask8 __U, __m512d __A,
5862 const int __imm)
5864 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5865 __imm,
5866 (__v4df) __W,
5867 (__mmask8) __U);
5870 extern __inline __m256d
5871 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5872 _mm512_maskz_extractf64x4_pd (__mmask8 __U, __m512d __A, const int __imm)
5874 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5875 __imm,
5876 (__v4df)
5877 _mm256_setzero_pd (),
5878 (__mmask8) __U);
5881 extern __inline __m128
5882 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5883 _mm512_extractf32x4_ps (__m512 __A, const int __imm)
5885 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5886 __imm,
5887 (__v4sf)
5888 _mm_undefined_ps (),
5889 (__mmask8) -1);
5892 extern __inline __m128
5893 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5894 _mm512_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m512 __A,
5895 const int __imm)
5897 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5898 __imm,
5899 (__v4sf) __W,
5900 (__mmask8) __U);
5903 extern __inline __m128
5904 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5905 _mm512_maskz_extractf32x4_ps (__mmask8 __U, __m512 __A, const int __imm)
5907 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5908 __imm,
5909 (__v4sf)
5910 _mm_setzero_ps (),
5911 (__mmask8) __U);
5914 extern __inline __m256i
5915 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5916 _mm512_extracti64x4_epi64 (__m512i __A, const int __imm)
5918 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5919 __imm,
5920 (__v4di)
5921 _mm256_undefined_si256 (),
5922 (__mmask8) -1);
5925 extern __inline __m256i
5926 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5927 _mm512_mask_extracti64x4_epi64 (__m256i __W, __mmask8 __U, __m512i __A,
5928 const int __imm)
5930 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5931 __imm,
5932 (__v4di) __W,
5933 (__mmask8) __U);
5936 extern __inline __m256i
5937 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5938 _mm512_maskz_extracti64x4_epi64 (__mmask8 __U, __m512i __A, const int __imm)
5940 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5941 __imm,
5942 (__v4di)
5943 _mm256_setzero_si256 (),
5944 (__mmask8) __U);
5947 extern __inline __m128i
5948 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5949 _mm512_extracti32x4_epi32 (__m512i __A, const int __imm)
5951 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5952 __imm,
5953 (__v4si)
5954 _mm_undefined_si128 (),
5955 (__mmask8) -1);
5958 extern __inline __m128i
5959 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5960 _mm512_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m512i __A,
5961 const int __imm)
5963 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5964 __imm,
5965 (__v4si) __W,
5966 (__mmask8) __U);
5969 extern __inline __m128i
5970 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5971 _mm512_maskz_extracti32x4_epi32 (__mmask8 __U, __m512i __A, const int __imm)
5973 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5974 __imm,
5975 (__v4si)
5976 _mm_setzero_si128 (),
5977 (__mmask8) __U);
5979 #else
5981 #define _mm512_extractf64x4_pd(X, C) \
5982 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5983 (int) (C),\
5984 (__v4df)(__m256d)_mm256_undefined_pd(),\
5985 (__mmask8)-1))
5987 #define _mm512_mask_extractf64x4_pd(W, U, X, C) \
5988 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5989 (int) (C),\
5990 (__v4df)(__m256d)(W),\
5991 (__mmask8)(U)))
5993 #define _mm512_maskz_extractf64x4_pd(U, X, C) \
5994 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5995 (int) (C),\
5996 (__v4df)(__m256d)_mm256_setzero_pd(),\
5997 (__mmask8)(U)))
5999 #define _mm512_extractf32x4_ps(X, C) \
6000 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
6001 (int) (C),\
6002 (__v4sf)(__m128)_mm_undefined_ps(),\
6003 (__mmask8)-1))
6005 #define _mm512_mask_extractf32x4_ps(W, U, X, C) \
6006 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
6007 (int) (C),\
6008 (__v4sf)(__m128)(W),\
6009 (__mmask8)(U)))
6011 #define _mm512_maskz_extractf32x4_ps(U, X, C) \
6012 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
6013 (int) (C),\
6014 (__v4sf)(__m128)_mm_setzero_ps(),\
6015 (__mmask8)(U)))
6017 #define _mm512_extracti64x4_epi64(X, C) \
6018 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
6019 (int) (C),\
6020 (__v4di)(__m256i)_mm256_undefined_si256 (),\
6021 (__mmask8)-1))
6023 #define _mm512_mask_extracti64x4_epi64(W, U, X, C) \
6024 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
6025 (int) (C),\
6026 (__v4di)(__m256i)(W),\
6027 (__mmask8)(U)))
6029 #define _mm512_maskz_extracti64x4_epi64(U, X, C) \
6030 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
6031 (int) (C),\
6032 (__v4di)(__m256i)_mm256_setzero_si256 (),\
6033 (__mmask8)(U)))
6035 #define _mm512_extracti32x4_epi32(X, C) \
6036 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
6037 (int) (C),\
6038 (__v4si)(__m128i)_mm_undefined_si128 (),\
6039 (__mmask8)-1))
6041 #define _mm512_mask_extracti32x4_epi32(W, U, X, C) \
6042 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
6043 (int) (C),\
6044 (__v4si)(__m128i)(W),\
6045 (__mmask8)(U)))
6047 #define _mm512_maskz_extracti32x4_epi32(U, X, C) \
6048 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
6049 (int) (C),\
6050 (__v4si)(__m128i)_mm_setzero_si128 (),\
6051 (__mmask8)(U)))
6052 #endif
6054 #ifdef __OPTIMIZE__
6055 extern __inline __m512i
6056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6057 _mm512_inserti32x4 (__m512i __A, __m128i __B, const int __imm)
6059 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __A,
6060 (__v4si) __B,
6061 __imm,
6062 (__v16si) __A, -1);
6065 extern __inline __m512
6066 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6067 _mm512_insertf32x4 (__m512 __A, __m128 __B, const int __imm)
6069 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __A,
6070 (__v4sf) __B,
6071 __imm,
6072 (__v16sf) __A, -1);
6075 extern __inline __m512i
6076 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6077 _mm512_inserti64x4 (__m512i __A, __m256i __B, const int __imm)
6079 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
6080 (__v4di) __B,
6081 __imm,
6082 (__v8di)
6083 _mm512_undefined_epi32 (),
6084 (__mmask8) -1);
6087 extern __inline __m512i
6088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6089 _mm512_mask_inserti64x4 (__m512i __W, __mmask8 __U, __m512i __A,
6090 __m256i __B, const int __imm)
6092 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
6093 (__v4di) __B,
6094 __imm,
6095 (__v8di) __W,
6096 (__mmask8) __U);
6099 extern __inline __m512i
6100 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6101 _mm512_maskz_inserti64x4 (__mmask8 __U, __m512i __A, __m256i __B,
6102 const int __imm)
6104 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
6105 (__v4di) __B,
6106 __imm,
6107 (__v8di)
6108 _mm512_setzero_si512 (),
6109 (__mmask8) __U);
6112 extern __inline __m512d
6113 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6114 _mm512_insertf64x4 (__m512d __A, __m256d __B, const int __imm)
6116 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
6117 (__v4df) __B,
6118 __imm,
6119 (__v8df)
6120 _mm512_undefined_pd (),
6121 (__mmask8) -1);
6124 extern __inline __m512d
6125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6126 _mm512_mask_insertf64x4 (__m512d __W, __mmask8 __U, __m512d __A,
6127 __m256d __B, const int __imm)
6129 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
6130 (__v4df) __B,
6131 __imm,
6132 (__v8df) __W,
6133 (__mmask8) __U);
6136 extern __inline __m512d
6137 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6138 _mm512_maskz_insertf64x4 (__mmask8 __U, __m512d __A, __m256d __B,
6139 const int __imm)
6141 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
6142 (__v4df) __B,
6143 __imm,
6144 (__v8df)
6145 _mm512_setzero_pd (),
6146 (__mmask8) __U);
6148 #else
6149 #define _mm512_insertf32x4(X, Y, C) \
6150 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
6151 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (X), (__mmask16)(-1)))
6153 #define _mm512_inserti32x4(X, Y, C) \
6154 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
6155 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (X), (__mmask16)(-1)))
6157 #define _mm512_insertf64x4(X, Y, C) \
6158 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
6159 (__v4df)(__m256d) (Y), (int) (C), \
6160 (__v8df)(__m512d)_mm512_undefined_pd(), \
6161 (__mmask8)-1))
6163 #define _mm512_mask_insertf64x4(W, U, X, Y, C) \
6164 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
6165 (__v4df)(__m256d) (Y), (int) (C), \
6166 (__v8df)(__m512d)(W), \
6167 (__mmask8)(U)))
6169 #define _mm512_maskz_insertf64x4(U, X, Y, C) \
6170 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
6171 (__v4df)(__m256d) (Y), (int) (C), \
6172 (__v8df)(__m512d)_mm512_setzero_pd(), \
6173 (__mmask8)(U)))
6175 #define _mm512_inserti64x4(X, Y, C) \
6176 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
6177 (__v4di)(__m256i) (Y), (int) (C), \
6178 (__v8di)(__m512i)_mm512_undefined_epi32 (), \
6179 (__mmask8)-1))
6181 #define _mm512_mask_inserti64x4(W, U, X, Y, C) \
6182 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
6183 (__v4di)(__m256i) (Y), (int) (C),\
6184 (__v8di)(__m512i)(W),\
6185 (__mmask8)(U)))
6187 #define _mm512_maskz_inserti64x4(U, X, Y, C) \
6188 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
6189 (__v4di)(__m256i) (Y), (int) (C), \
6190 (__v8di)(__m512i)_mm512_setzero_si512 (), \
6191 (__mmask8)(U)))
6192 #endif
6194 extern __inline __m512d
6195 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6196 _mm512_loadu_pd (void const *__P)
6198 return *(__m512d_u *)__P;
6201 extern __inline __m512d
6202 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6203 _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
6205 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
6206 (__v8df) __W,
6207 (__mmask8) __U);
6210 extern __inline __m512d
6211 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6212 _mm512_maskz_loadu_pd (__mmask8 __U, void const *__P)
6214 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
6215 (__v8df)
6216 _mm512_setzero_pd (),
6217 (__mmask8) __U);
6220 extern __inline void
6221 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6222 _mm512_storeu_pd (void *__P, __m512d __A)
6224 *(__m512d_u *)__P = __A;
6227 extern __inline void
6228 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6229 _mm512_mask_storeu_pd (void *__P, __mmask8 __U, __m512d __A)
6231 __builtin_ia32_storeupd512_mask ((double *) __P, (__v8df) __A,
6232 (__mmask8) __U);
6235 extern __inline __m512
6236 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6237 _mm512_loadu_ps (void const *__P)
6239 return *(__m512_u *)__P;
6242 extern __inline __m512
6243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6244 _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
6246 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
6247 (__v16sf) __W,
6248 (__mmask16) __U);
6251 extern __inline __m512
6252 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6253 _mm512_maskz_loadu_ps (__mmask16 __U, void const *__P)
6255 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
6256 (__v16sf)
6257 _mm512_setzero_ps (),
6258 (__mmask16) __U);
6261 extern __inline void
6262 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6263 _mm512_storeu_ps (void *__P, __m512 __A)
6265 *(__m512_u *)__P = __A;
6268 extern __inline void
6269 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6270 _mm512_mask_storeu_ps (void *__P, __mmask16 __U, __m512 __A)
6272 __builtin_ia32_storeups512_mask ((float *) __P, (__v16sf) __A,
6273 (__mmask16) __U);
6276 extern __inline __m512i
6277 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6278 _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
6280 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
6281 (__v8di) __W,
6282 (__mmask8) __U);
6285 extern __inline __m512i
6286 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6287 _mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
6289 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
6290 (__v8di)
6291 _mm512_setzero_si512 (),
6292 (__mmask8) __U);
6295 extern __inline void
6296 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6297 _mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A)
6299 __builtin_ia32_storedqudi512_mask ((long long *) __P, (__v8di) __A,
6300 (__mmask8) __U);
6303 extern __inline __m512i
6304 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6305 _mm512_loadu_si512 (void const *__P)
6307 return *(__m512i_u *)__P;
6310 extern __inline __m512i
6311 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6312 _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
6314 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
6315 (__v16si) __W,
6316 (__mmask16) __U);
6319 extern __inline __m512i
6320 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6321 _mm512_maskz_loadu_epi32 (__mmask16 __U, void const *__P)
6323 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
6324 (__v16si)
6325 _mm512_setzero_si512 (),
6326 (__mmask16) __U);
6329 extern __inline void
6330 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6331 _mm512_storeu_si512 (void *__P, __m512i __A)
6333 *(__m512i_u *)__P = __A;
6336 extern __inline void
6337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6338 _mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A)
6340 __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A,
6341 (__mmask16) __U);
6344 extern __inline __m512d
6345 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6346 _mm512_permutevar_pd (__m512d __A, __m512i __C)
6348 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
6349 (__v8di) __C,
6350 (__v8df)
6351 _mm512_undefined_pd (),
6352 (__mmask8) -1);
6355 extern __inline __m512d
6356 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6357 _mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
6359 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
6360 (__v8di) __C,
6361 (__v8df) __W,
6362 (__mmask8) __U);
6365 extern __inline __m512d
6366 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6367 _mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C)
6369 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
6370 (__v8di) __C,
6371 (__v8df)
6372 _mm512_setzero_pd (),
6373 (__mmask8) __U);
6376 extern __inline __m512
6377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6378 _mm512_permutevar_ps (__m512 __A, __m512i __C)
6380 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
6381 (__v16si) __C,
6382 (__v16sf)
6383 _mm512_undefined_ps (),
6384 (__mmask16) -1);
6387 extern __inline __m512
6388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6389 _mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
6391 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
6392 (__v16si) __C,
6393 (__v16sf) __W,
6394 (__mmask16) __U);
6397 extern __inline __m512
6398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6399 _mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C)
6401 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
6402 (__v16si) __C,
6403 (__v16sf)
6404 _mm512_setzero_ps (),
6405 (__mmask16) __U);
6408 extern __inline __m512i
6409 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6410 _mm512_permutex2var_epi64 (__m512i __A, __m512i __I, __m512i __B)
6412 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
6413 /* idx */ ,
6414 (__v8di) __A,
6415 (__v8di) __B,
6416 (__mmask8) -1);
6419 extern __inline __m512i
6420 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6421 _mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
6422 __m512i __B)
6424 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
6425 /* idx */ ,
6426 (__v8di) __A,
6427 (__v8di) __B,
6428 (__mmask8) __U);
6431 extern __inline __m512i
6432 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6433 _mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
6434 __mmask8 __U, __m512i __B)
6436 return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
6437 (__v8di) __I
6438 /* idx */ ,
6439 (__v8di) __B,
6440 (__mmask8) __U);
6443 extern __inline __m512i
6444 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6445 _mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
6446 __m512i __I, __m512i __B)
6448 return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
6449 /* idx */ ,
6450 (__v8di) __A,
6451 (__v8di) __B,
6452 (__mmask8) __U);
6455 extern __inline __m512i
6456 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6457 _mm512_permutex2var_epi32 (__m512i __A, __m512i __I, __m512i __B)
6459 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
6460 /* idx */ ,
6461 (__v16si) __A,
6462 (__v16si) __B,
6463 (__mmask16) -1);
6466 extern __inline __m512i
6467 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6468 _mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
6469 __m512i __I, __m512i __B)
6471 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
6472 /* idx */ ,
6473 (__v16si) __A,
6474 (__v16si) __B,
6475 (__mmask16) __U);
6478 extern __inline __m512i
6479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6480 _mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
6481 __mmask16 __U, __m512i __B)
6483 return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
6484 (__v16si) __I
6485 /* idx */ ,
6486 (__v16si) __B,
6487 (__mmask16) __U);
6490 extern __inline __m512i
6491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6492 _mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
6493 __m512i __I, __m512i __B)
6495 return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
6496 /* idx */ ,
6497 (__v16si) __A,
6498 (__v16si) __B,
6499 (__mmask16) __U);
6502 extern __inline __m512d
6503 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6504 _mm512_permutex2var_pd (__m512d __A, __m512i __I, __m512d __B)
6506 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6507 /* idx */ ,
6508 (__v8df) __A,
6509 (__v8df) __B,
6510 (__mmask8) -1);
6513 extern __inline __m512d
6514 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6515 _mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I,
6516 __m512d __B)
6518 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6519 /* idx */ ,
6520 (__v8df) __A,
6521 (__v8df) __B,
6522 (__mmask8) __U);
6525 extern __inline __m512d
6526 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6527 _mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
6528 __m512d __B)
6530 return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
6531 (__v8di) __I
6532 /* idx */ ,
6533 (__v8df) __B,
6534 (__mmask8) __U);
6537 extern __inline __m512d
6538 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6539 _mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
6540 __m512d __B)
6542 return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
6543 /* idx */ ,
6544 (__v8df) __A,
6545 (__v8df) __B,
6546 (__mmask8) __U);
6549 extern __inline __m512
6550 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6551 _mm512_permutex2var_ps (__m512 __A, __m512i __I, __m512 __B)
6553 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6554 /* idx */ ,
6555 (__v16sf) __A,
6556 (__v16sf) __B,
6557 (__mmask16) -1);
6560 extern __inline __m512
6561 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6562 _mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
6564 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6565 /* idx */ ,
6566 (__v16sf) __A,
6567 (__v16sf) __B,
6568 (__mmask16) __U);
6571 extern __inline __m512
6572 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6573 _mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
6574 __m512 __B)
6576 return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
6577 (__v16si) __I
6578 /* idx */ ,
6579 (__v16sf) __B,
6580 (__mmask16) __U);
6583 extern __inline __m512
6584 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6585 _mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
6586 __m512 __B)
6588 return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
6589 /* idx */ ,
6590 (__v16sf) __A,
6591 (__v16sf) __B,
6592 (__mmask16) __U);
6595 #ifdef __OPTIMIZE__
6596 extern __inline __m512d
6597 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6598 _mm512_permute_pd (__m512d __X, const int __C)
6600 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6601 (__v8df)
6602 _mm512_undefined_pd (),
6603 (__mmask8) -1);
6606 extern __inline __m512d
6607 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6608 _mm512_mask_permute_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __C)
6610 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6611 (__v8df) __W,
6612 (__mmask8) __U);
6615 extern __inline __m512d
6616 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6617 _mm512_maskz_permute_pd (__mmask8 __U, __m512d __X, const int __C)
6619 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6620 (__v8df)
6621 _mm512_setzero_pd (),
6622 (__mmask8) __U);
6625 extern __inline __m512
6626 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6627 _mm512_permute_ps (__m512 __X, const int __C)
6629 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6630 (__v16sf)
6631 _mm512_undefined_ps (),
6632 (__mmask16) -1);
6635 extern __inline __m512
6636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6637 _mm512_mask_permute_ps (__m512 __W, __mmask16 __U, __m512 __X, const int __C)
6639 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6640 (__v16sf) __W,
6641 (__mmask16) __U);
6644 extern __inline __m512
6645 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6646 _mm512_maskz_permute_ps (__mmask16 __U, __m512 __X, const int __C)
6648 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6649 (__v16sf)
6650 _mm512_setzero_ps (),
6651 (__mmask16) __U);
6653 #else
6654 #define _mm512_permute_pd(X, C) \
6655 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6656 (__v8df)(__m512d)_mm512_undefined_pd(),\
6657 (__mmask8)(-1)))
6659 #define _mm512_mask_permute_pd(W, U, X, C) \
6660 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6661 (__v8df)(__m512d)(W), \
6662 (__mmask8)(U)))
6664 #define _mm512_maskz_permute_pd(U, X, C) \
6665 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6666 (__v8df)(__m512d)_mm512_setzero_pd(), \
6667 (__mmask8)(U)))
6669 #define _mm512_permute_ps(X, C) \
6670 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6671 (__v16sf)(__m512)_mm512_undefined_ps(),\
6672 (__mmask16)(-1)))
6674 #define _mm512_mask_permute_ps(W, U, X, C) \
6675 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6676 (__v16sf)(__m512)(W), \
6677 (__mmask16)(U)))
6679 #define _mm512_maskz_permute_ps(U, X, C) \
6680 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6681 (__v16sf)(__m512)_mm512_setzero_ps(), \
6682 (__mmask16)(U)))
6683 #endif
6685 #ifdef __OPTIMIZE__
6686 extern __inline __m512i
6687 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6688 _mm512_permutex_epi64 (__m512i __X, const int __I)
6690 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6691 (__v8di)
6692 _mm512_undefined_epi32 (),
6693 (__mmask8) (-1));
6696 extern __inline __m512i
6697 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6698 _mm512_mask_permutex_epi64 (__m512i __W, __mmask8 __M,
6699 __m512i __X, const int __I)
6701 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6702 (__v8di) __W,
6703 (__mmask8) __M);
6706 extern __inline __m512i
6707 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6708 _mm512_maskz_permutex_epi64 (__mmask8 __M, __m512i __X, const int __I)
6710 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6711 (__v8di)
6712 _mm512_setzero_si512 (),
6713 (__mmask8) __M);
6716 extern __inline __m512d
6717 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6718 _mm512_permutex_pd (__m512d __X, const int __M)
6720 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6721 (__v8df)
6722 _mm512_undefined_pd (),
6723 (__mmask8) -1);
6726 extern __inline __m512d
6727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6728 _mm512_mask_permutex_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __M)
6730 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6731 (__v8df) __W,
6732 (__mmask8) __U);
6735 extern __inline __m512d
6736 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6737 _mm512_maskz_permutex_pd (__mmask8 __U, __m512d __X, const int __M)
6739 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6740 (__v8df)
6741 _mm512_setzero_pd (),
6742 (__mmask8) __U);
6744 #else
6745 #define _mm512_permutex_pd(X, M) \
6746 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6747 (__v8df)(__m512d)_mm512_undefined_pd(),\
6748 (__mmask8)-1))
6750 #define _mm512_mask_permutex_pd(W, U, X, M) \
6751 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6752 (__v8df)(__m512d)(W), (__mmask8)(U)))
6754 #define _mm512_maskz_permutex_pd(U, X, M) \
6755 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6756 (__v8df)(__m512d)_mm512_setzero_pd(),\
6757 (__mmask8)(U)))
6759 #define _mm512_permutex_epi64(X, I) \
6760 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6761 (int)(I), \
6762 (__v8di)(__m512i) \
6763 (_mm512_undefined_epi32 ()),\
6764 (__mmask8)(-1)))
6766 #define _mm512_maskz_permutex_epi64(M, X, I) \
6767 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6768 (int)(I), \
6769 (__v8di)(__m512i) \
6770 (_mm512_setzero_si512 ()),\
6771 (__mmask8)(M)))
6773 #define _mm512_mask_permutex_epi64(W, M, X, I) \
6774 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6775 (int)(I), \
6776 (__v8di)(__m512i)(W), \
6777 (__mmask8)(M)))
6778 #endif
6780 extern __inline __m512i
6781 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6782 _mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
6784 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6785 (__v8di) __X,
6786 (__v8di)
6787 _mm512_setzero_si512 (),
6788 __M);
6791 extern __inline __m512i
6792 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6793 _mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
6795 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6796 (__v8di) __X,
6797 (__v8di)
6798 _mm512_undefined_epi32 (),
6799 (__mmask8) -1);
6802 extern __inline __m512i
6803 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6804 _mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
6805 __m512i __Y)
6807 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6808 (__v8di) __X,
6809 (__v8di) __W,
6810 __M);
6813 extern __inline __m512i
6814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6815 _mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
6817 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6818 (__v16si) __X,
6819 (__v16si)
6820 _mm512_setzero_si512 (),
6821 __M);
6824 extern __inline __m512i
6825 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6826 _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
6828 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6829 (__v16si) __X,
6830 (__v16si)
6831 _mm512_undefined_epi32 (),
6832 (__mmask16) -1);
6835 extern __inline __m512i
6836 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6837 _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
6838 __m512i __Y)
6840 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6841 (__v16si) __X,
6842 (__v16si) __W,
6843 __M);
6846 extern __inline __m512d
6847 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6848 _mm512_permutexvar_pd (__m512i __X, __m512d __Y)
6850 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6851 (__v8di) __X,
6852 (__v8df)
6853 _mm512_undefined_pd (),
6854 (__mmask8) -1);
6857 extern __inline __m512d
6858 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6859 _mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
6861 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6862 (__v8di) __X,
6863 (__v8df) __W,
6864 (__mmask8) __U);
6867 extern __inline __m512d
6868 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6869 _mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
6871 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6872 (__v8di) __X,
6873 (__v8df)
6874 _mm512_setzero_pd (),
6875 (__mmask8) __U);
6878 extern __inline __m512
6879 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6880 _mm512_permutexvar_ps (__m512i __X, __m512 __Y)
6882 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6883 (__v16si) __X,
6884 (__v16sf)
6885 _mm512_undefined_ps (),
6886 (__mmask16) -1);
6889 extern __inline __m512
6890 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6891 _mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
6893 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6894 (__v16si) __X,
6895 (__v16sf) __W,
6896 (__mmask16) __U);
6899 extern __inline __m512
6900 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6901 _mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
6903 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6904 (__v16si) __X,
6905 (__v16sf)
6906 _mm512_setzero_ps (),
6907 (__mmask16) __U);
6910 #ifdef __OPTIMIZE__
6911 extern __inline __m512
6912 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6913 _mm512_shuffle_ps (__m512 __M, __m512 __V, const int __imm)
6915 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6916 (__v16sf) __V, __imm,
6917 (__v16sf)
6918 _mm512_undefined_ps (),
6919 (__mmask16) -1);
6922 extern __inline __m512
6923 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6924 _mm512_mask_shuffle_ps (__m512 __W, __mmask16 __U, __m512 __M,
6925 __m512 __V, const int __imm)
6927 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6928 (__v16sf) __V, __imm,
6929 (__v16sf) __W,
6930 (__mmask16) __U);
6933 extern __inline __m512
6934 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6935 _mm512_maskz_shuffle_ps (__mmask16 __U, __m512 __M, __m512 __V, const int __imm)
6937 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6938 (__v16sf) __V, __imm,
6939 (__v16sf)
6940 _mm512_setzero_ps (),
6941 (__mmask16) __U);
6944 extern __inline __m512d
6945 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6946 _mm512_shuffle_pd (__m512d __M, __m512d __V, const int __imm)
6948 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6949 (__v8df) __V, __imm,
6950 (__v8df)
6951 _mm512_undefined_pd (),
6952 (__mmask8) -1);
6955 extern __inline __m512d
6956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6957 _mm512_mask_shuffle_pd (__m512d __W, __mmask8 __U, __m512d __M,
6958 __m512d __V, const int __imm)
6960 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6961 (__v8df) __V, __imm,
6962 (__v8df) __W,
6963 (__mmask8) __U);
6966 extern __inline __m512d
6967 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6968 _mm512_maskz_shuffle_pd (__mmask8 __U, __m512d __M, __m512d __V,
6969 const int __imm)
6971 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6972 (__v8df) __V, __imm,
6973 (__v8df)
6974 _mm512_setzero_pd (),
6975 (__mmask8) __U);
6978 extern __inline __m512d
6979 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6980 _mm512_fixupimm_round_pd (__m512d __A, __m512d __B, __m512i __C,
6981 const int __imm, const int __R)
6983 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
6984 (__v8df) __B,
6985 (__v8di) __C,
6986 __imm,
6987 (__mmask8) -1, __R);
6990 extern __inline __m512d
6991 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6992 _mm512_mask_fixupimm_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
6993 __m512i __C, const int __imm, const int __R)
6995 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
6996 (__v8df) __B,
6997 (__v8di) __C,
6998 __imm,
6999 (__mmask8) __U, __R);
7002 extern __inline __m512d
7003 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7004 _mm512_maskz_fixupimm_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
7005 __m512i __C, const int __imm, const int __R)
7007 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
7008 (__v8df) __B,
7009 (__v8di) __C,
7010 __imm,
7011 (__mmask8) __U, __R);
7014 extern __inline __m512
7015 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7016 _mm512_fixupimm_round_ps (__m512 __A, __m512 __B, __m512i __C,
7017 const int __imm, const int __R)
7019 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
7020 (__v16sf) __B,
7021 (__v16si) __C,
7022 __imm,
7023 (__mmask16) -1, __R);
7026 extern __inline __m512
7027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7028 _mm512_mask_fixupimm_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
7029 __m512i __C, const int __imm, const int __R)
7031 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
7032 (__v16sf) __B,
7033 (__v16si) __C,
7034 __imm,
7035 (__mmask16) __U, __R);
7038 extern __inline __m512
7039 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7040 _mm512_maskz_fixupimm_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
7041 __m512i __C, const int __imm, const int __R)
7043 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
7044 (__v16sf) __B,
7045 (__v16si) __C,
7046 __imm,
7047 (__mmask16) __U, __R);
7050 extern __inline __m128d
7051 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7052 _mm_fixupimm_round_sd (__m128d __A, __m128d __B, __m128i __C,
7053 const int __imm, const int __R)
7055 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
7056 (__v2df) __B,
7057 (__v2di) __C, __imm,
7058 (__mmask8) -1, __R);
7061 extern __inline __m128d
7062 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7063 _mm_mask_fixupimm_round_sd (__m128d __A, __mmask8 __U, __m128d __B,
7064 __m128i __C, const int __imm, const int __R)
7066 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
7067 (__v2df) __B,
7068 (__v2di) __C, __imm,
7069 (__mmask8) __U, __R);
7072 extern __inline __m128d
7073 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7074 _mm_maskz_fixupimm_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
7075 __m128i __C, const int __imm, const int __R)
7077 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
7078 (__v2df) __B,
7079 (__v2di) __C,
7080 __imm,
7081 (__mmask8) __U, __R);
7084 extern __inline __m128
7085 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7086 _mm_fixupimm_round_ss (__m128 __A, __m128 __B, __m128i __C,
7087 const int __imm, const int __R)
7089 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
7090 (__v4sf) __B,
7091 (__v4si) __C, __imm,
7092 (__mmask8) -1, __R);
7095 extern __inline __m128
7096 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7097 _mm_mask_fixupimm_round_ss (__m128 __A, __mmask8 __U, __m128 __B,
7098 __m128i __C, const int __imm, const int __R)
7100 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
7101 (__v4sf) __B,
7102 (__v4si) __C, __imm,
7103 (__mmask8) __U, __R);
7106 extern __inline __m128
7107 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7108 _mm_maskz_fixupimm_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
7109 __m128i __C, const int __imm, const int __R)
7111 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
7112 (__v4sf) __B,
7113 (__v4si) __C, __imm,
7114 (__mmask8) __U, __R);
7117 #else
7118 #define _mm512_shuffle_pd(X, Y, C) \
7119 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
7120 (__v8df)(__m512d)(Y), (int)(C),\
7121 (__v8df)(__m512d)_mm512_undefined_pd(),\
7122 (__mmask8)-1))
7124 #define _mm512_mask_shuffle_pd(W, U, X, Y, C) \
7125 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
7126 (__v8df)(__m512d)(Y), (int)(C),\
7127 (__v8df)(__m512d)(W),\
7128 (__mmask8)(U)))
7130 #define _mm512_maskz_shuffle_pd(U, X, Y, C) \
7131 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
7132 (__v8df)(__m512d)(Y), (int)(C),\
7133 (__v8df)(__m512d)_mm512_setzero_pd(),\
7134 (__mmask8)(U)))
7136 #define _mm512_shuffle_ps(X, Y, C) \
7137 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
7138 (__v16sf)(__m512)(Y), (int)(C),\
7139 (__v16sf)(__m512)_mm512_undefined_ps(),\
7140 (__mmask16)-1))
7142 #define _mm512_mask_shuffle_ps(W, U, X, Y, C) \
7143 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
7144 (__v16sf)(__m512)(Y), (int)(C),\
7145 (__v16sf)(__m512)(W),\
7146 (__mmask16)(U)))
7148 #define _mm512_maskz_shuffle_ps(U, X, Y, C) \
7149 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
7150 (__v16sf)(__m512)(Y), (int)(C),\
7151 (__v16sf)(__m512)_mm512_setzero_ps(),\
7152 (__mmask16)(U)))
7154 #define _mm512_fixupimm_round_pd(X, Y, Z, C, R) \
7155 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
7156 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
7157 (__mmask8)(-1), (R)))
7159 #define _mm512_mask_fixupimm_round_pd(X, U, Y, Z, C, R) \
7160 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
7161 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
7162 (__mmask8)(U), (R)))
7164 #define _mm512_maskz_fixupimm_round_pd(U, X, Y, Z, C, R) \
7165 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
7166 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
7167 (__mmask8)(U), (R)))
7169 #define _mm512_fixupimm_round_ps(X, Y, Z, C, R) \
7170 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
7171 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
7172 (__mmask16)(-1), (R)))
7174 #define _mm512_mask_fixupimm_round_ps(X, U, Y, Z, C, R) \
7175 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
7176 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
7177 (__mmask16)(U), (R)))
7179 #define _mm512_maskz_fixupimm_round_ps(U, X, Y, Z, C, R) \
7180 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
7181 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
7182 (__mmask16)(U), (R)))
7184 #define _mm_fixupimm_round_sd(X, Y, Z, C, R) \
7185 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
7186 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
7187 (__mmask8)(-1), (R)))
7189 #define _mm_mask_fixupimm_round_sd(X, U, Y, Z, C, R) \
7190 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
7191 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
7192 (__mmask8)(U), (R)))
7194 #define _mm_maskz_fixupimm_round_sd(U, X, Y, Z, C, R) \
7195 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
7196 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
7197 (__mmask8)(U), (R)))
7199 #define _mm_fixupimm_round_ss(X, Y, Z, C, R) \
7200 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
7201 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
7202 (__mmask8)(-1), (R)))
7204 #define _mm_mask_fixupimm_round_ss(X, U, Y, Z, C, R) \
7205 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
7206 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
7207 (__mmask8)(U), (R)))
7209 #define _mm_maskz_fixupimm_round_ss(U, X, Y, Z, C, R) \
7210 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
7211 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
7212 (__mmask8)(U), (R)))
7213 #endif
7215 extern __inline __m512
7216 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7217 _mm512_movehdup_ps (__m512 __A)
7219 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
7220 (__v16sf)
7221 _mm512_undefined_ps (),
7222 (__mmask16) -1);
7225 extern __inline __m512
7226 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7227 _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
7229 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
7230 (__v16sf) __W,
7231 (__mmask16) __U);
7234 extern __inline __m512
7235 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7236 _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
7238 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
7239 (__v16sf)
7240 _mm512_setzero_ps (),
7241 (__mmask16) __U);
7244 extern __inline __m512
7245 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7246 _mm512_moveldup_ps (__m512 __A)
7248 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
7249 (__v16sf)
7250 _mm512_undefined_ps (),
7251 (__mmask16) -1);
7254 extern __inline __m512
7255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7256 _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
7258 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
7259 (__v16sf) __W,
7260 (__mmask16) __U);
7263 extern __inline __m512
7264 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7265 _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
7267 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
7268 (__v16sf)
7269 _mm512_setzero_ps (),
7270 (__mmask16) __U);
7273 extern __inline __m512i
7274 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7275 _mm512_or_si512 (__m512i __A, __m512i __B)
7277 return (__m512i) ((__v16su) __A | (__v16su) __B);
7280 extern __inline __m512i
7281 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7282 _mm512_or_epi32 (__m512i __A, __m512i __B)
7284 return (__m512i) ((__v16su) __A | (__v16su) __B);
7287 extern __inline __m512i
7288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7289 _mm512_mask_or_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7291 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
7292 (__v16si) __B,
7293 (__v16si) __W,
7294 (__mmask16) __U);
7297 extern __inline __m512i
7298 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7299 _mm512_maskz_or_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7301 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
7302 (__v16si) __B,
7303 (__v16si)
7304 _mm512_setzero_si512 (),
7305 (__mmask16) __U);
7308 extern __inline __m512i
7309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7310 _mm512_or_epi64 (__m512i __A, __m512i __B)
7312 return (__m512i) ((__v8du) __A | (__v8du) __B);
7315 extern __inline __m512i
7316 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7317 _mm512_mask_or_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7319 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
7320 (__v8di) __B,
7321 (__v8di) __W,
7322 (__mmask8) __U);
7325 extern __inline __m512i
7326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7327 _mm512_maskz_or_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7329 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
7330 (__v8di) __B,
7331 (__v8di)
7332 _mm512_setzero_si512 (),
7333 (__mmask8) __U);
7336 extern __inline __m512i
7337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7338 _mm512_xor_si512 (__m512i __A, __m512i __B)
7340 return (__m512i) ((__v16su) __A ^ (__v16su) __B);
7343 extern __inline __m512i
7344 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7345 _mm512_xor_epi32 (__m512i __A, __m512i __B)
7347 return (__m512i) ((__v16su) __A ^ (__v16su) __B);
7350 extern __inline __m512i
7351 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7352 _mm512_mask_xor_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7354 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
7355 (__v16si) __B,
7356 (__v16si) __W,
7357 (__mmask16) __U);
7360 extern __inline __m512i
7361 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7362 _mm512_maskz_xor_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7364 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
7365 (__v16si) __B,
7366 (__v16si)
7367 _mm512_setzero_si512 (),
7368 (__mmask16) __U);
7371 extern __inline __m512i
7372 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7373 _mm512_xor_epi64 (__m512i __A, __m512i __B)
7375 return (__m512i) ((__v8du) __A ^ (__v8du) __B);
7378 extern __inline __m512i
7379 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7380 _mm512_mask_xor_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7382 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
7383 (__v8di) __B,
7384 (__v8di) __W,
7385 (__mmask8) __U);
7388 extern __inline __m512i
7389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7390 _mm512_maskz_xor_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7392 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
7393 (__v8di) __B,
7394 (__v8di)
7395 _mm512_setzero_si512 (),
7396 (__mmask8) __U);
7399 #ifdef __OPTIMIZE__
7400 extern __inline __m512i
7401 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7402 _mm512_rol_epi32 (__m512i __A, const int __B)
7404 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
7405 (__v16si)
7406 _mm512_undefined_epi32 (),
7407 (__mmask16) -1);
7410 extern __inline __m512i
7411 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7412 _mm512_mask_rol_epi32 (__m512i __W, __mmask16 __U, __m512i __A, const int __B)
7414 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
7415 (__v16si) __W,
7416 (__mmask16) __U);
7419 extern __inline __m512i
7420 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7421 _mm512_maskz_rol_epi32 (__mmask16 __U, __m512i __A, const int __B)
7423 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
7424 (__v16si)
7425 _mm512_setzero_si512 (),
7426 (__mmask16) __U);
7429 extern __inline __m512i
7430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7431 _mm512_ror_epi32 (__m512i __A, int __B)
7433 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
7434 (__v16si)
7435 _mm512_undefined_epi32 (),
7436 (__mmask16) -1);
7439 extern __inline __m512i
7440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7441 _mm512_mask_ror_epi32 (__m512i __W, __mmask16 __U, __m512i __A, int __B)
7443 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
7444 (__v16si) __W,
7445 (__mmask16) __U);
7448 extern __inline __m512i
7449 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7450 _mm512_maskz_ror_epi32 (__mmask16 __U, __m512i __A, int __B)
7452 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
7453 (__v16si)
7454 _mm512_setzero_si512 (),
7455 (__mmask16) __U);
7458 extern __inline __m512i
7459 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7460 _mm512_rol_epi64 (__m512i __A, const int __B)
7462 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
7463 (__v8di)
7464 _mm512_undefined_epi32 (),
7465 (__mmask8) -1);
7468 extern __inline __m512i
7469 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7470 _mm512_mask_rol_epi64 (__m512i __W, __mmask8 __U, __m512i __A, const int __B)
7472 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
7473 (__v8di) __W,
7474 (__mmask8) __U);
7477 extern __inline __m512i
7478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7479 _mm512_maskz_rol_epi64 (__mmask8 __U, __m512i __A, const int __B)
7481 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
7482 (__v8di)
7483 _mm512_setzero_si512 (),
7484 (__mmask8) __U);
7487 extern __inline __m512i
7488 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7489 _mm512_ror_epi64 (__m512i __A, int __B)
7491 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
7492 (__v8di)
7493 _mm512_undefined_epi32 (),
7494 (__mmask8) -1);
7497 extern __inline __m512i
7498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7499 _mm512_mask_ror_epi64 (__m512i __W, __mmask8 __U, __m512i __A, int __B)
7501 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
7502 (__v8di) __W,
7503 (__mmask8) __U);
7506 extern __inline __m512i
7507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7508 _mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B)
7510 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
7511 (__v8di)
7512 _mm512_setzero_si512 (),
7513 (__mmask8) __U);
7516 #else
7517 #define _mm512_rol_epi32(A, B) \
7518 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7519 (int)(B), \
7520 (__v16si)_mm512_undefined_epi32 (), \
7521 (__mmask16)(-1)))
7522 #define _mm512_mask_rol_epi32(W, U, A, B) \
7523 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7524 (int)(B), \
7525 (__v16si)(__m512i)(W), \
7526 (__mmask16)(U)))
7527 #define _mm512_maskz_rol_epi32(U, A, B) \
7528 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7529 (int)(B), \
7530 (__v16si)_mm512_setzero_si512 (), \
7531 (__mmask16)(U)))
7532 #define _mm512_ror_epi32(A, B) \
7533 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7534 (int)(B), \
7535 (__v16si)_mm512_undefined_epi32 (), \
7536 (__mmask16)(-1)))
7537 #define _mm512_mask_ror_epi32(W, U, A, B) \
7538 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7539 (int)(B), \
7540 (__v16si)(__m512i)(W), \
7541 (__mmask16)(U)))
7542 #define _mm512_maskz_ror_epi32(U, A, B) \
7543 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7544 (int)(B), \
7545 (__v16si)_mm512_setzero_si512 (), \
7546 (__mmask16)(U)))
7547 #define _mm512_rol_epi64(A, B) \
7548 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7549 (int)(B), \
7550 (__v8di)_mm512_undefined_epi32 (), \
7551 (__mmask8)(-1)))
7552 #define _mm512_mask_rol_epi64(W, U, A, B) \
7553 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7554 (int)(B), \
7555 (__v8di)(__m512i)(W), \
7556 (__mmask8)(U)))
7557 #define _mm512_maskz_rol_epi64(U, A, B) \
7558 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7559 (int)(B), \
7560 (__v8di)_mm512_setzero_si512 (), \
7561 (__mmask8)(U)))
7563 #define _mm512_ror_epi64(A, B) \
7564 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7565 (int)(B), \
7566 (__v8di)_mm512_undefined_epi32 (), \
7567 (__mmask8)(-1)))
7568 #define _mm512_mask_ror_epi64(W, U, A, B) \
7569 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7570 (int)(B), \
7571 (__v8di)(__m512i)(W), \
7572 (__mmask8)(U)))
7573 #define _mm512_maskz_ror_epi64(U, A, B) \
7574 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7575 (int)(B), \
7576 (__v8di)_mm512_setzero_si512 (), \
7577 (__mmask8)(U)))
7578 #endif
7580 extern __inline __m512i
7581 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7582 _mm512_and_si512 (__m512i __A, __m512i __B)
7584 return (__m512i) ((__v16su) __A & (__v16su) __B);
7587 extern __inline __m512i
7588 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7589 _mm512_and_epi32 (__m512i __A, __m512i __B)
7591 return (__m512i) ((__v16su) __A & (__v16su) __B);
7594 extern __inline __m512i
7595 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7596 _mm512_mask_and_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7598 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7599 (__v16si) __B,
7600 (__v16si) __W,
7601 (__mmask16) __U);
7604 extern __inline __m512i
7605 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7606 _mm512_maskz_and_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7608 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7609 (__v16si) __B,
7610 (__v16si)
7611 _mm512_setzero_si512 (),
7612 (__mmask16) __U);
7615 extern __inline __m512i
7616 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7617 _mm512_and_epi64 (__m512i __A, __m512i __B)
7619 return (__m512i) ((__v8du) __A & (__v8du) __B);
7622 extern __inline __m512i
7623 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7624 _mm512_mask_and_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7626 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7627 (__v8di) __B,
7628 (__v8di) __W, __U);
7631 extern __inline __m512i
7632 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7633 _mm512_maskz_and_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7635 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7636 (__v8di) __B,
7637 (__v8di)
7638 _mm512_setzero_pd (),
7639 __U);
7642 extern __inline __m512i
7643 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7644 _mm512_andnot_si512 (__m512i __A, __m512i __B)
7646 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7647 (__v16si) __B,
7648 (__v16si)
7649 _mm512_undefined_epi32 (),
7650 (__mmask16) -1);
7653 extern __inline __m512i
7654 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7655 _mm512_andnot_epi32 (__m512i __A, __m512i __B)
7657 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7658 (__v16si) __B,
7659 (__v16si)
7660 _mm512_undefined_epi32 (),
7661 (__mmask16) -1);
7664 extern __inline __m512i
7665 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7666 _mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7668 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7669 (__v16si) __B,
7670 (__v16si) __W,
7671 (__mmask16) __U);
7674 extern __inline __m512i
7675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7676 _mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7678 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7679 (__v16si) __B,
7680 (__v16si)
7681 _mm512_setzero_si512 (),
7682 (__mmask16) __U);
7685 extern __inline __m512i
7686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7687 _mm512_andnot_epi64 (__m512i __A, __m512i __B)
7689 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7690 (__v8di) __B,
7691 (__v8di)
7692 _mm512_undefined_epi32 (),
7693 (__mmask8) -1);
7696 extern __inline __m512i
7697 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7698 _mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7700 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7701 (__v8di) __B,
7702 (__v8di) __W, __U);
7705 extern __inline __m512i
7706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7707 _mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7709 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7710 (__v8di) __B,
7711 (__v8di)
7712 _mm512_setzero_pd (),
7713 __U);
7716 extern __inline __mmask16
7717 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7718 _mm512_test_epi32_mask (__m512i __A, __m512i __B)
7720 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7721 (__v16si) __B,
7722 (__mmask16) -1);
7725 extern __inline __mmask16
7726 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7727 _mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7729 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7730 (__v16si) __B, __U);
7733 extern __inline __mmask8
7734 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7735 _mm512_test_epi64_mask (__m512i __A, __m512i __B)
7737 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
7738 (__v8di) __B,
7739 (__mmask8) -1);
7742 extern __inline __mmask8
7743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7744 _mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7746 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
7749 extern __inline __mmask16
7750 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7751 _mm512_testn_epi32_mask (__m512i __A, __m512i __B)
7753 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7754 (__v16si) __B,
7755 (__mmask16) -1);
7758 extern __inline __mmask16
7759 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7760 _mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7762 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7763 (__v16si) __B, __U);
7766 extern __inline __mmask8
7767 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7768 _mm512_testn_epi64_mask (__m512i __A, __m512i __B)
7770 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7771 (__v8di) __B,
7772 (__mmask8) -1);
7775 extern __inline __mmask8
7776 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7777 _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7779 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7780 (__v8di) __B, __U);
7783 extern __inline __m512
7784 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7785 _mm512_abs_ps (__m512 __A)
7787 return (__m512) _mm512_and_epi32 ((__m512i) __A,
7788 _mm512_set1_epi32 (0x7fffffff));
7791 extern __inline __m512
7792 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7793 _mm512_mask_abs_ps (__m512 __W, __mmask16 __U, __m512 __A)
7795 return (__m512) _mm512_mask_and_epi32 ((__m512i) __W, __U, (__m512i) __A,
7796 _mm512_set1_epi32 (0x7fffffff));
7799 extern __inline __m512d
7800 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7801 _mm512_abs_pd (__m512d __A)
7803 return (__m512d) _mm512_and_epi64 ((__m512i) __A,
7804 _mm512_set1_epi64 (0x7fffffffffffffffLL));
7807 extern __inline __m512d
7808 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7809 _mm512_mask_abs_pd (__m512d __W, __mmask8 __U, __m512d __A)
7811 return (__m512d)
7812 _mm512_mask_and_epi64 ((__m512i) __W, __U, (__m512i) __A,
7813 _mm512_set1_epi64 (0x7fffffffffffffffLL));
7816 extern __inline __m512i
7817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7818 _mm512_unpackhi_epi32 (__m512i __A, __m512i __B)
7820 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7821 (__v16si) __B,
7822 (__v16si)
7823 _mm512_undefined_epi32 (),
7824 (__mmask16) -1);
7827 extern __inline __m512i
7828 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7829 _mm512_mask_unpackhi_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7830 __m512i __B)
7832 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7833 (__v16si) __B,
7834 (__v16si) __W,
7835 (__mmask16) __U);
7838 extern __inline __m512i
7839 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7840 _mm512_maskz_unpackhi_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7842 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7843 (__v16si) __B,
7844 (__v16si)
7845 _mm512_setzero_si512 (),
7846 (__mmask16) __U);
7849 extern __inline __m512i
7850 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7851 _mm512_unpackhi_epi64 (__m512i __A, __m512i __B)
7853 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7854 (__v8di) __B,
7855 (__v8di)
7856 _mm512_undefined_epi32 (),
7857 (__mmask8) -1);
7860 extern __inline __m512i
7861 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7862 _mm512_mask_unpackhi_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7864 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7865 (__v8di) __B,
7866 (__v8di) __W,
7867 (__mmask8) __U);
7870 extern __inline __m512i
7871 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7872 _mm512_maskz_unpackhi_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7874 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7875 (__v8di) __B,
7876 (__v8di)
7877 _mm512_setzero_si512 (),
7878 (__mmask8) __U);
7881 extern __inline __m512i
7882 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7883 _mm512_unpacklo_epi32 (__m512i __A, __m512i __B)
7885 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7886 (__v16si) __B,
7887 (__v16si)
7888 _mm512_undefined_epi32 (),
7889 (__mmask16) -1);
7892 extern __inline __m512i
7893 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7894 _mm512_mask_unpacklo_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7895 __m512i __B)
7897 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7898 (__v16si) __B,
7899 (__v16si) __W,
7900 (__mmask16) __U);
7903 extern __inline __m512i
7904 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7905 _mm512_maskz_unpacklo_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7907 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7908 (__v16si) __B,
7909 (__v16si)
7910 _mm512_setzero_si512 (),
7911 (__mmask16) __U);
7914 extern __inline __m512i
7915 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7916 _mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
7918 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7919 (__v8di) __B,
7920 (__v8di)
7921 _mm512_undefined_epi32 (),
7922 (__mmask8) -1);
7925 extern __inline __m512i
7926 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7927 _mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7929 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7930 (__v8di) __B,
7931 (__v8di) __W,
7932 (__mmask8) __U);
7935 extern __inline __m512i
7936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7937 _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7939 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7940 (__v8di) __B,
7941 (__v8di)
7942 _mm512_setzero_si512 (),
7943 (__mmask8) __U);
7946 #ifdef __x86_64__
7947 #ifdef __OPTIMIZE__
7948 extern __inline unsigned long long
7949 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7950 _mm_cvt_roundss_u64 (__m128 __A, const int __R)
7952 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) __A, __R);
7955 extern __inline long long
7956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7957 _mm_cvt_roundss_si64 (__m128 __A, const int __R)
7959 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
7962 extern __inline long long
7963 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7964 _mm_cvt_roundss_i64 (__m128 __A, const int __R)
7966 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
7969 extern __inline unsigned long long
7970 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7971 _mm_cvtt_roundss_u64 (__m128 __A, const int __R)
7973 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) __A, __R);
7976 extern __inline long long
7977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7978 _mm_cvtt_roundss_i64 (__m128 __A, const int __R)
7980 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
7983 extern __inline long long
7984 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7985 _mm_cvtt_roundss_si64 (__m128 __A, const int __R)
7987 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
7989 #else
7990 #define _mm_cvt_roundss_u64(A, B) \
7991 ((unsigned long long)__builtin_ia32_vcvtss2usi64(A, B))
7993 #define _mm_cvt_roundss_si64(A, B) \
7994 ((long long)__builtin_ia32_vcvtss2si64(A, B))
7996 #define _mm_cvt_roundss_i64(A, B) \
7997 ((long long)__builtin_ia32_vcvtss2si64(A, B))
7999 #define _mm_cvtt_roundss_u64(A, B) \
8000 ((unsigned long long)__builtin_ia32_vcvttss2usi64(A, B))
8002 #define _mm_cvtt_roundss_i64(A, B) \
8003 ((long long)__builtin_ia32_vcvttss2si64(A, B))
8005 #define _mm_cvtt_roundss_si64(A, B) \
8006 ((long long)__builtin_ia32_vcvttss2si64(A, B))
8007 #endif
8008 #endif
8010 #ifdef __OPTIMIZE__
8011 extern __inline unsigned
8012 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8013 _mm_cvt_roundss_u32 (__m128 __A, const int __R)
8015 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, __R);
8018 extern __inline int
8019 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8020 _mm_cvt_roundss_si32 (__m128 __A, const int __R)
8022 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
8025 extern __inline int
8026 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8027 _mm_cvt_roundss_i32 (__m128 __A, const int __R)
8029 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
8032 extern __inline unsigned
8033 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8034 _mm_cvtt_roundss_u32 (__m128 __A, const int __R)
8036 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, __R);
8039 extern __inline int
8040 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8041 _mm_cvtt_roundss_i32 (__m128 __A, const int __R)
8043 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
8046 extern __inline int
8047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8048 _mm_cvtt_roundss_si32 (__m128 __A, const int __R)
8050 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
8052 #else
8053 #define _mm_cvt_roundss_u32(A, B) \
8054 ((unsigned)__builtin_ia32_vcvtss2usi32(A, B))
8056 #define _mm_cvt_roundss_si32(A, B) \
8057 ((int)__builtin_ia32_vcvtss2si32(A, B))
8059 #define _mm_cvt_roundss_i32(A, B) \
8060 ((int)__builtin_ia32_vcvtss2si32(A, B))
8062 #define _mm_cvtt_roundss_u32(A, B) \
8063 ((unsigned)__builtin_ia32_vcvttss2usi32(A, B))
8065 #define _mm_cvtt_roundss_si32(A, B) \
8066 ((int)__builtin_ia32_vcvttss2si32(A, B))
8068 #define _mm_cvtt_roundss_i32(A, B) \
8069 ((int)__builtin_ia32_vcvttss2si32(A, B))
8070 #endif
8072 #ifdef __x86_64__
8073 #ifdef __OPTIMIZE__
8074 extern __inline unsigned long long
8075 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8076 _mm_cvt_roundsd_u64 (__m128d __A, const int __R)
8078 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) __A, __R);
8081 extern __inline long long
8082 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8083 _mm_cvt_roundsd_si64 (__m128d __A, const int __R)
8085 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
8088 extern __inline long long
8089 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8090 _mm_cvt_roundsd_i64 (__m128d __A, const int __R)
8092 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
8095 extern __inline unsigned long long
8096 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8097 _mm_cvtt_roundsd_u64 (__m128d __A, const int __R)
8099 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) __A, __R);
8102 extern __inline long long
8103 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8104 _mm_cvtt_roundsd_si64 (__m128d __A, const int __R)
8106 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
8109 extern __inline long long
8110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8111 _mm_cvtt_roundsd_i64 (__m128d __A, const int __R)
8113 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
8115 #else
8116 #define _mm_cvt_roundsd_u64(A, B) \
8117 ((unsigned long long)__builtin_ia32_vcvtsd2usi64(A, B))
8119 #define _mm_cvt_roundsd_si64(A, B) \
8120 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
8122 #define _mm_cvt_roundsd_i64(A, B) \
8123 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
8125 #define _mm_cvtt_roundsd_u64(A, B) \
8126 ((unsigned long long)__builtin_ia32_vcvttsd2usi64(A, B))
8128 #define _mm_cvtt_roundsd_si64(A, B) \
8129 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
8131 #define _mm_cvtt_roundsd_i64(A, B) \
8132 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
8133 #endif
8134 #endif
8136 #ifdef __OPTIMIZE__
8137 extern __inline unsigned
8138 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8139 _mm_cvt_roundsd_u32 (__m128d __A, const int __R)
8141 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, __R);
8144 extern __inline int
8145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8146 _mm_cvt_roundsd_si32 (__m128d __A, const int __R)
8148 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
8151 extern __inline int
8152 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8153 _mm_cvt_roundsd_i32 (__m128d __A, const int __R)
8155 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
8158 extern __inline unsigned
8159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8160 _mm_cvtt_roundsd_u32 (__m128d __A, const int __R)
8162 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, __R);
8165 extern __inline int
8166 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8167 _mm_cvtt_roundsd_i32 (__m128d __A, const int __R)
8169 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
8172 extern __inline int
8173 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8174 _mm_cvtt_roundsd_si32 (__m128d __A, const int __R)
8176 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
8178 #else
8179 #define _mm_cvt_roundsd_u32(A, B) \
8180 ((unsigned)__builtin_ia32_vcvtsd2usi32(A, B))
8182 #define _mm_cvt_roundsd_si32(A, B) \
8183 ((int)__builtin_ia32_vcvtsd2si32(A, B))
8185 #define _mm_cvt_roundsd_i32(A, B) \
8186 ((int)__builtin_ia32_vcvtsd2si32(A, B))
8188 #define _mm_cvtt_roundsd_u32(A, B) \
8189 ((unsigned)__builtin_ia32_vcvttsd2usi32(A, B))
8191 #define _mm_cvtt_roundsd_si32(A, B) \
8192 ((int)__builtin_ia32_vcvttsd2si32(A, B))
8194 #define _mm_cvtt_roundsd_i32(A, B) \
8195 ((int)__builtin_ia32_vcvttsd2si32(A, B))
8196 #endif
8198 extern __inline __m512d
8199 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8200 _mm512_movedup_pd (__m512d __A)
8202 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
8203 (__v8df)
8204 _mm512_undefined_pd (),
8205 (__mmask8) -1);
8208 extern __inline __m512d
8209 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8210 _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
8212 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
8213 (__v8df) __W,
8214 (__mmask8) __U);
8217 extern __inline __m512d
8218 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8219 _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
8221 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
8222 (__v8df)
8223 _mm512_setzero_pd (),
8224 (__mmask8) __U);
8227 extern __inline __m512d
8228 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8229 _mm512_unpacklo_pd (__m512d __A, __m512d __B)
8231 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
8232 (__v8df) __B,
8233 (__v8df)
8234 _mm512_undefined_pd (),
8235 (__mmask8) -1);
8238 extern __inline __m512d
8239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8240 _mm512_mask_unpacklo_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
8242 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
8243 (__v8df) __B,
8244 (__v8df) __W,
8245 (__mmask8) __U);
8248 extern __inline __m512d
8249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8250 _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
8252 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
8253 (__v8df) __B,
8254 (__v8df)
8255 _mm512_setzero_pd (),
8256 (__mmask8) __U);
8259 extern __inline __m512d
8260 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8261 _mm512_unpackhi_pd (__m512d __A, __m512d __B)
8263 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
8264 (__v8df) __B,
8265 (__v8df)
8266 _mm512_undefined_pd (),
8267 (__mmask8) -1);
8270 extern __inline __m512d
8271 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8272 _mm512_mask_unpackhi_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
8274 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
8275 (__v8df) __B,
8276 (__v8df) __W,
8277 (__mmask8) __U);
8280 extern __inline __m512d
8281 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8282 _mm512_maskz_unpackhi_pd (__mmask8 __U, __m512d __A, __m512d __B)
8284 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
8285 (__v8df) __B,
8286 (__v8df)
8287 _mm512_setzero_pd (),
8288 (__mmask8) __U);
8291 extern __inline __m512
8292 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8293 _mm512_unpackhi_ps (__m512 __A, __m512 __B)
8295 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
8296 (__v16sf) __B,
8297 (__v16sf)
8298 _mm512_undefined_ps (),
8299 (__mmask16) -1);
8302 extern __inline __m512
8303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8304 _mm512_mask_unpackhi_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
8306 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
8307 (__v16sf) __B,
8308 (__v16sf) __W,
8309 (__mmask16) __U);
8312 extern __inline __m512
8313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8314 _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
8316 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
8317 (__v16sf) __B,
8318 (__v16sf)
8319 _mm512_setzero_ps (),
8320 (__mmask16) __U);
8323 #ifdef __OPTIMIZE__
8324 extern __inline __m512d
8325 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8326 _mm512_cvt_roundps_pd (__m256 __A, const int __R)
8328 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
8329 (__v8df)
8330 _mm512_undefined_pd (),
8331 (__mmask8) -1, __R);
8334 extern __inline __m512d
8335 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8336 _mm512_mask_cvt_roundps_pd (__m512d __W, __mmask8 __U, __m256 __A,
8337 const int __R)
8339 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
8340 (__v8df) __W,
8341 (__mmask8) __U, __R);
8344 extern __inline __m512d
8345 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8346 _mm512_maskz_cvt_roundps_pd (__mmask8 __U, __m256 __A, const int __R)
8348 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
8349 (__v8df)
8350 _mm512_setzero_pd (),
8351 (__mmask8) __U, __R);
8354 extern __inline __m512
8355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8356 _mm512_cvt_roundph_ps (__m256i __A, const int __R)
8358 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
8359 (__v16sf)
8360 _mm512_undefined_ps (),
8361 (__mmask16) -1, __R);
8364 extern __inline __m512
8365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8366 _mm512_mask_cvt_roundph_ps (__m512 __W, __mmask16 __U, __m256i __A,
8367 const int __R)
8369 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
8370 (__v16sf) __W,
8371 (__mmask16) __U, __R);
8374 extern __inline __m512
8375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8376 _mm512_maskz_cvt_roundph_ps (__mmask16 __U, __m256i __A, const int __R)
8378 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
8379 (__v16sf)
8380 _mm512_setzero_ps (),
8381 (__mmask16) __U, __R);
8384 extern __inline __m256i
8385 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8386 _mm512_cvt_roundps_ph (__m512 __A, const int __I)
8388 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8389 __I,
8390 (__v16hi)
8391 _mm256_undefined_si256 (),
8392 -1);
8395 extern __inline __m256i
8396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8397 _mm512_cvtps_ph (__m512 __A, const int __I)
8399 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8400 __I,
8401 (__v16hi)
8402 _mm256_undefined_si256 (),
8403 -1);
8406 extern __inline __m256i
8407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8408 _mm512_mask_cvt_roundps_ph (__m256i __U, __mmask16 __W, __m512 __A,
8409 const int __I)
8411 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8412 __I,
8413 (__v16hi) __U,
8414 (__mmask16) __W);
8417 extern __inline __m256i
8418 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8419 _mm512_mask_cvtps_ph (__m256i __U, __mmask16 __W, __m512 __A, const int __I)
8421 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8422 __I,
8423 (__v16hi) __U,
8424 (__mmask16) __W);
8427 extern __inline __m256i
8428 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8429 _mm512_maskz_cvt_roundps_ph (__mmask16 __W, __m512 __A, const int __I)
8431 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8432 __I,
8433 (__v16hi)
8434 _mm256_setzero_si256 (),
8435 (__mmask16) __W);
8438 extern __inline __m256i
8439 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8440 _mm512_maskz_cvtps_ph (__mmask16 __W, __m512 __A, const int __I)
8442 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8443 __I,
8444 (__v16hi)
8445 _mm256_setzero_si256 (),
8446 (__mmask16) __W);
8448 #else
8449 #define _mm512_cvt_roundps_pd(A, B) \
8450 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, B)
8452 #define _mm512_mask_cvt_roundps_pd(W, U, A, B) \
8453 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)(W), U, B)
8455 #define _mm512_maskz_cvt_roundps_pd(U, A, B) \
8456 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_setzero_pd(), U, B)
8458 #define _mm512_cvt_roundph_ps(A, B) \
8459 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
8461 #define _mm512_mask_cvt_roundph_ps(W, U, A, B) \
8462 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)(W), U, B)
8464 #define _mm512_maskz_cvt_roundph_ps(U, A, B) \
8465 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_setzero_ps(), U, B)
8467 #define _mm512_cvt_roundps_ph(A, I) \
8468 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
8469 (__v16hi)_mm256_undefined_si256 (), -1))
8470 #define _mm512_cvtps_ph(A, I) \
8471 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
8472 (__v16hi)_mm256_undefined_si256 (), -1))
8473 #define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
8474 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
8475 (__v16hi)(__m256i)(U), (__mmask16) (W)))
8476 #define _mm512_mask_cvtps_ph(U, W, A, I) \
8477 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
8478 (__v16hi)(__m256i)(U), (__mmask16) (W)))
8479 #define _mm512_maskz_cvt_roundps_ph(W, A, I) \
8480 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
8481 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
8482 #define _mm512_maskz_cvtps_ph(W, A, I) \
8483 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
8484 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
8485 #endif
8487 #ifdef __OPTIMIZE__
8488 extern __inline __m256
8489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8490 _mm512_cvt_roundpd_ps (__m512d __A, const int __R)
8492 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
8493 (__v8sf)
8494 _mm256_undefined_ps (),
8495 (__mmask8) -1, __R);
8498 extern __inline __m256
8499 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8500 _mm512_mask_cvt_roundpd_ps (__m256 __W, __mmask8 __U, __m512d __A,
8501 const int __R)
8503 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
8504 (__v8sf) __W,
8505 (__mmask8) __U, __R);
8508 extern __inline __m256
8509 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8510 _mm512_maskz_cvt_roundpd_ps (__mmask8 __U, __m512d __A, const int __R)
8512 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
8513 (__v8sf)
8514 _mm256_setzero_ps (),
8515 (__mmask8) __U, __R);
8518 extern __inline __m128
8519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8520 _mm_cvt_roundsd_ss (__m128 __A, __m128d __B, const int __R)
8522 return (__m128) __builtin_ia32_cvtsd2ss_round ((__v4sf) __A,
8523 (__v2df) __B,
8524 __R);
8527 extern __inline __m128d
8528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8529 _mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R)
8531 return (__m128d) __builtin_ia32_cvtss2sd_round ((__v2df) __A,
8532 (__v4sf) __B,
8533 __R);
8535 #else
8536 #define _mm512_cvt_roundpd_ps(A, B) \
8537 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_undefined_ps(), -1, B)
8539 #define _mm512_mask_cvt_roundpd_ps(W, U, A, B) \
8540 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)(W), U, B)
8542 #define _mm512_maskz_cvt_roundpd_ps(U, A, B) \
8543 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), U, B)
8545 #define _mm_cvt_roundsd_ss(A, B, C) \
8546 (__m128)__builtin_ia32_cvtsd2ss_round(A, B, C)
8548 #define _mm_cvt_roundss_sd(A, B, C) \
8549 (__m128d)__builtin_ia32_cvtss2sd_round(A, B, C)
8550 #endif
8552 extern __inline void
8553 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8554 _mm512_stream_si512 (__m512i * __P, __m512i __A)
8556 __builtin_ia32_movntdq512 ((__v8di *) __P, (__v8di) __A);
8559 extern __inline void
8560 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8561 _mm512_stream_ps (float *__P, __m512 __A)
8563 __builtin_ia32_movntps512 (__P, (__v16sf) __A);
8566 extern __inline void
8567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8568 _mm512_stream_pd (double *__P, __m512d __A)
8570 __builtin_ia32_movntpd512 (__P, (__v8df) __A);
8573 extern __inline __m512i
8574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8575 _mm512_stream_load_si512 (void *__P)
8577 return __builtin_ia32_movntdqa512 ((__v8di *)__P);
8580 /* Constants for mantissa extraction */
8581 typedef enum
8583 _MM_MANT_NORM_1_2, /* interval [1, 2) */
8584 _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
8585 _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
8586 _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
8587 } _MM_MANTISSA_NORM_ENUM;
8589 typedef enum
8591 _MM_MANT_SIGN_src, /* sign = sign(SRC) */
8592 _MM_MANT_SIGN_zero, /* sign = 0 */
8593 _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
8594 } _MM_MANTISSA_SIGN_ENUM;
8596 #ifdef __OPTIMIZE__
8597 extern __inline __m128
8598 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8599 _mm_getexp_round_ss (__m128 __A, __m128 __B, const int __R)
8601 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
8602 (__v4sf) __B,
8603 __R);
8606 extern __inline __m128
8607 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8608 _mm_mask_getexp_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
8609 __m128 __B, const int __R)
8611 return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
8612 (__v4sf) __B,
8613 (__v4sf) __W,
8614 (__mmask8) __U, __R);
8617 extern __inline __m128
8618 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8619 _mm_maskz_getexp_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
8620 const int __R)
8622 return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
8623 (__v4sf) __B,
8624 (__v4sf)
8625 _mm_setzero_ps (),
8626 (__mmask8) __U, __R);
8629 extern __inline __m128d
8630 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8631 _mm_getexp_round_sd (__m128d __A, __m128d __B, const int __R)
8633 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
8634 (__v2df) __B,
8635 __R);
8638 extern __inline __m128d
8639 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8640 _mm_mask_getexp_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
8641 __m128d __B, const int __R)
8643 return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
8644 (__v2df) __B,
8645 (__v2df) __W,
8646 (__mmask8) __U, __R);
8649 extern __inline __m128d
8650 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8651 _mm_maskz_getexp_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
8652 const int __R)
8654 return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
8655 (__v2df) __B,
8656 (__v2df)
8657 _mm_setzero_pd (),
8658 (__mmask8) __U, __R);
8661 extern __inline __m512
8662 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8663 _mm512_getexp_round_ps (__m512 __A, const int __R)
8665 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8666 (__v16sf)
8667 _mm512_undefined_ps (),
8668 (__mmask16) -1, __R);
8671 extern __inline __m512
8672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8673 _mm512_mask_getexp_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8674 const int __R)
8676 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8677 (__v16sf) __W,
8678 (__mmask16) __U, __R);
8681 extern __inline __m512
8682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8683 _mm512_maskz_getexp_round_ps (__mmask16 __U, __m512 __A, const int __R)
8685 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8686 (__v16sf)
8687 _mm512_setzero_ps (),
8688 (__mmask16) __U, __R);
8691 extern __inline __m512d
8692 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8693 _mm512_getexp_round_pd (__m512d __A, const int __R)
8695 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8696 (__v8df)
8697 _mm512_undefined_pd (),
8698 (__mmask8) -1, __R);
8701 extern __inline __m512d
8702 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8703 _mm512_mask_getexp_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8704 const int __R)
8706 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8707 (__v8df) __W,
8708 (__mmask8) __U, __R);
8711 extern __inline __m512d
8712 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8713 _mm512_maskz_getexp_round_pd (__mmask8 __U, __m512d __A, const int __R)
8715 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8716 (__v8df)
8717 _mm512_setzero_pd (),
8718 (__mmask8) __U, __R);
8721 extern __inline __m512d
8722 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8723 _mm512_getmant_round_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
8724 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8726 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8727 (__C << 2) | __B,
8728 _mm512_undefined_pd (),
8729 (__mmask8) -1, __R);
8732 extern __inline __m512d
8733 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8734 _mm512_mask_getmant_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8735 _MM_MANTISSA_NORM_ENUM __B,
8736 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8738 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8739 (__C << 2) | __B,
8740 (__v8df) __W, __U,
8741 __R);
8744 extern __inline __m512d
8745 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8746 _mm512_maskz_getmant_round_pd (__mmask8 __U, __m512d __A,
8747 _MM_MANTISSA_NORM_ENUM __B,
8748 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8750 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8751 (__C << 2) | __B,
8752 (__v8df)
8753 _mm512_setzero_pd (),
8754 __U, __R);
8757 extern __inline __m512
8758 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8759 _mm512_getmant_round_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
8760 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8762 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8763 (__C << 2) | __B,
8764 _mm512_undefined_ps (),
8765 (__mmask16) -1, __R);
8768 extern __inline __m512
8769 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8770 _mm512_mask_getmant_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8771 _MM_MANTISSA_NORM_ENUM __B,
8772 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8774 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8775 (__C << 2) | __B,
8776 (__v16sf) __W, __U,
8777 __R);
8780 extern __inline __m512
8781 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8782 _mm512_maskz_getmant_round_ps (__mmask16 __U, __m512 __A,
8783 _MM_MANTISSA_NORM_ENUM __B,
8784 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8786 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8787 (__C << 2) | __B,
8788 (__v16sf)
8789 _mm512_setzero_ps (),
8790 __U, __R);
8793 extern __inline __m128d
8794 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8795 _mm_getmant_round_sd (__m128d __A, __m128d __B,
8796 _MM_MANTISSA_NORM_ENUM __C,
8797 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8799 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
8800 (__v2df) __B,
8801 (__D << 2) | __C,
8802 __R);
8805 extern __inline __m128d
8806 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8807 _mm_mask_getmant_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
8808 __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
8809 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8811 return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
8812 (__v2df) __B,
8813 (__D << 2) | __C,
8814 (__v2df) __W,
8815 __U, __R);
8818 extern __inline __m128d
8819 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8820 _mm_maskz_getmant_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
8821 _MM_MANTISSA_NORM_ENUM __C,
8822 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8824 return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
8825 (__v2df) __B,
8826 (__D << 2) | __C,
8827 (__v2df)
8828 _mm_setzero_pd(),
8829 __U, __R);
8832 extern __inline __m128
8833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8834 _mm_getmant_round_ss (__m128 __A, __m128 __B,
8835 _MM_MANTISSA_NORM_ENUM __C,
8836 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8838 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
8839 (__v4sf) __B,
8840 (__D << 2) | __C,
8841 __R);
8844 extern __inline __m128
8845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8846 _mm_mask_getmant_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
8847 __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
8848 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8850 return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
8851 (__v4sf) __B,
8852 (__D << 2) | __C,
8853 (__v4sf) __W,
8854 __U, __R);
8857 extern __inline __m128
8858 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8859 _mm_maskz_getmant_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
8860 _MM_MANTISSA_NORM_ENUM __C,
8861 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8863 return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
8864 (__v4sf) __B,
8865 (__D << 2) | __C,
8866 (__v4sf)
8867 _mm_setzero_ps(),
8868 __U, __R);
8871 #else
8872 #define _mm512_getmant_round_pd(X, B, C, R) \
8873 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8874 (int)(((C)<<2) | (B)), \
8875 (__v8df)(__m512d)_mm512_undefined_pd(), \
8876 (__mmask8)-1,\
8877 (R)))
8879 #define _mm512_mask_getmant_round_pd(W, U, X, B, C, R) \
8880 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8881 (int)(((C)<<2) | (B)), \
8882 (__v8df)(__m512d)(W), \
8883 (__mmask8)(U),\
8884 (R)))
8886 #define _mm512_maskz_getmant_round_pd(U, X, B, C, R) \
8887 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8888 (int)(((C)<<2) | (B)), \
8889 (__v8df)(__m512d)_mm512_setzero_pd(), \
8890 (__mmask8)(U),\
8891 (R)))
8892 #define _mm512_getmant_round_ps(X, B, C, R) \
8893 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8894 (int)(((C)<<2) | (B)), \
8895 (__v16sf)(__m512)_mm512_undefined_ps(), \
8896 (__mmask16)-1,\
8897 (R)))
8899 #define _mm512_mask_getmant_round_ps(W, U, X, B, C, R) \
8900 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8901 (int)(((C)<<2) | (B)), \
8902 (__v16sf)(__m512)(W), \
8903 (__mmask16)(U),\
8904 (R)))
8906 #define _mm512_maskz_getmant_round_ps(U, X, B, C, R) \
8907 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8908 (int)(((C)<<2) | (B)), \
8909 (__v16sf)(__m512)_mm512_setzero_ps(), \
8910 (__mmask16)(U),\
8911 (R)))
8912 #define _mm_getmant_round_sd(X, Y, C, D, R) \
8913 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
8914 (__v2df)(__m128d)(Y), \
8915 (int)(((D)<<2) | (C)), \
8916 (R)))
8918 #define _mm_mask_getmant_round_sd(W, U, X, Y, C, D, R) \
8919 ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
8920 (__v2df)(__m128d)(Y), \
8921 (int)(((D)<<2) | (C)), \
8922 (__v2df)(__m128d)(W), \
8923 (__mmask8)(U),\
8924 (R)))
8926 #define _mm_maskz_getmant_round_sd(U, X, Y, C, D, R) \
8927 ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
8928 (__v2df)(__m128d)(Y), \
8929 (int)(((D)<<2) | (C)), \
8930 (__v2df)(__m128d)_mm_setzero_pd(), \
8931 (__mmask8)(U),\
8932 (R)))
8934 #define _mm_getmant_round_ss(X, Y, C, D, R) \
8935 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
8936 (__v4sf)(__m128)(Y), \
8937 (int)(((D)<<2) | (C)), \
8938 (R)))
8940 #define _mm_mask_getmant_round_ss(W, U, X, Y, C, D, R) \
8941 ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
8942 (__v4sf)(__m128)(Y), \
8943 (int)(((D)<<2) | (C)), \
8944 (__v4sf)(__m128)(W), \
8945 (__mmask8)(U),\
8946 (R)))
8948 #define _mm_maskz_getmant_round_ss(U, X, Y, C, D, R) \
8949 ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
8950 (__v4sf)(__m128)(Y), \
8951 (int)(((D)<<2) | (C)), \
8952 (__v4sf)(__m128)_mm_setzero_ps(), \
8953 (__mmask8)(U),\
8954 (R)))
8956 #define _mm_getexp_round_ss(A, B, R) \
8957 ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), R))
8959 #define _mm_mask_getexp_round_ss(W, U, A, B, C) \
8960 (__m128)__builtin_ia32_getexpss_mask_round(A, B, W, U, C)
8962 #define _mm_maskz_getexp_round_ss(U, A, B, C) \
8963 (__m128)__builtin_ia32_getexpss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
8965 #define _mm_getexp_round_sd(A, B, R) \
8966 ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), R))
8968 #define _mm_mask_getexp_round_sd(W, U, A, B, C) \
8969 (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, W, U, C)
8971 #define _mm_maskz_getexp_round_sd(U, A, B, C) \
8972 (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
8975 #define _mm512_getexp_round_ps(A, R) \
8976 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8977 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, R))
8979 #define _mm512_mask_getexp_round_ps(W, U, A, R) \
8980 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8981 (__v16sf)(__m512)(W), (__mmask16)(U), R))
8983 #define _mm512_maskz_getexp_round_ps(U, A, R) \
8984 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8985 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), R))
8987 #define _mm512_getexp_round_pd(A, R) \
8988 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8989 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, R))
8991 #define _mm512_mask_getexp_round_pd(W, U, A, R) \
8992 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8993 (__v8df)(__m512d)(W), (__mmask8)(U), R))
8995 #define _mm512_maskz_getexp_round_pd(U, A, R) \
8996 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8997 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), R))
8998 #endif
9000 #ifdef __OPTIMIZE__
9001 extern __inline __m512
9002 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9003 _mm512_roundscale_round_ps (__m512 __A, const int __imm, const int __R)
9005 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
9006 (__v16sf)
9007 _mm512_undefined_ps (),
9008 -1, __R);
9011 extern __inline __m512
9012 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9013 _mm512_mask_roundscale_round_ps (__m512 __A, __mmask16 __B, __m512 __C,
9014 const int __imm, const int __R)
9016 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
9017 (__v16sf) __A,
9018 (__mmask16) __B, __R);
9021 extern __inline __m512
9022 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9023 _mm512_maskz_roundscale_round_ps (__mmask16 __A, __m512 __B,
9024 const int __imm, const int __R)
9026 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
9027 __imm,
9028 (__v16sf)
9029 _mm512_setzero_ps (),
9030 (__mmask16) __A, __R);
9033 extern __inline __m512d
9034 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9035 _mm512_roundscale_round_pd (__m512d __A, const int __imm, const int __R)
9037 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
9038 (__v8df)
9039 _mm512_undefined_pd (),
9040 -1, __R);
9043 extern __inline __m512d
9044 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9045 _mm512_mask_roundscale_round_pd (__m512d __A, __mmask8 __B,
9046 __m512d __C, const int __imm, const int __R)
9048 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
9049 (__v8df) __A,
9050 (__mmask8) __B, __R);
9053 extern __inline __m512d
9054 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9055 _mm512_maskz_roundscale_round_pd (__mmask8 __A, __m512d __B,
9056 const int __imm, const int __R)
9058 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
9059 __imm,
9060 (__v8df)
9061 _mm512_setzero_pd (),
9062 (__mmask8) __A, __R);
9065 extern __inline __m128
9066 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9067 _mm_roundscale_round_ss (__m128 __A, __m128 __B, const int __imm, const int __R)
9069 return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
9070 (__v4sf) __B, __imm, __R);
9073 extern __inline __m128d
9074 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9075 _mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm,
9076 const int __R)
9078 return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
9079 (__v2df) __B, __imm, __R);
9082 #else
9083 #define _mm512_roundscale_round_ps(A, B, R) \
9084 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
9085 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), R))
9086 #define _mm512_mask_roundscale_round_ps(A, B, C, D, R) \
9087 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
9088 (int)(D), \
9089 (__v16sf)(__m512)(A), \
9090 (__mmask16)(B), R))
9091 #define _mm512_maskz_roundscale_round_ps(A, B, C, R) \
9092 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
9093 (int)(C), \
9094 (__v16sf)_mm512_setzero_ps(),\
9095 (__mmask16)(A), R))
9096 #define _mm512_roundscale_round_pd(A, B, R) \
9097 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
9098 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), R))
9099 #define _mm512_mask_roundscale_round_pd(A, B, C, D, R) \
9100 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
9101 (int)(D), \
9102 (__v8df)(__m512d)(A), \
9103 (__mmask8)(B), R))
9104 #define _mm512_maskz_roundscale_round_pd(A, B, C, R) \
9105 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
9106 (int)(C), \
9107 (__v8df)_mm512_setzero_pd(),\
9108 (__mmask8)(A), R))
9109 #define _mm_roundscale_round_ss(A, B, C, R) \
9110 ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \
9111 (__v4sf)(__m128)(B), (int)(C), R))
9112 #define _mm_roundscale_round_sd(A, B, C, R) \
9113 ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \
9114 (__v2df)(__m128d)(B), (int)(C), R))
9115 #endif
9117 extern __inline __m512
9118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9119 _mm512_floor_ps (__m512 __A)
9121 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
9122 _MM_FROUND_FLOOR,
9123 (__v16sf) __A, -1,
9124 _MM_FROUND_CUR_DIRECTION);
9127 extern __inline __m512d
9128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9129 _mm512_floor_pd (__m512d __A)
9131 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
9132 _MM_FROUND_FLOOR,
9133 (__v8df) __A, -1,
9134 _MM_FROUND_CUR_DIRECTION);
9137 extern __inline __m512
9138 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9139 _mm512_ceil_ps (__m512 __A)
9141 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
9142 _MM_FROUND_CEIL,
9143 (__v16sf) __A, -1,
9144 _MM_FROUND_CUR_DIRECTION);
9147 extern __inline __m512d
9148 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9149 _mm512_ceil_pd (__m512d __A)
9151 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
9152 _MM_FROUND_CEIL,
9153 (__v8df) __A, -1,
9154 _MM_FROUND_CUR_DIRECTION);
9157 extern __inline __m512
9158 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9159 _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
9161 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
9162 _MM_FROUND_FLOOR,
9163 (__v16sf) __W, __U,
9164 _MM_FROUND_CUR_DIRECTION);
9167 extern __inline __m512d
9168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9169 _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
9171 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
9172 _MM_FROUND_FLOOR,
9173 (__v8df) __W, __U,
9174 _MM_FROUND_CUR_DIRECTION);
9177 extern __inline __m512
9178 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9179 _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
9181 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
9182 _MM_FROUND_CEIL,
9183 (__v16sf) __W, __U,
9184 _MM_FROUND_CUR_DIRECTION);
9187 extern __inline __m512d
9188 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9189 _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
9191 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
9192 _MM_FROUND_CEIL,
9193 (__v8df) __W, __U,
9194 _MM_FROUND_CUR_DIRECTION);
9197 #ifdef __OPTIMIZE__
9198 extern __inline __m512i
9199 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9200 _mm512_alignr_epi32 (__m512i __A, __m512i __B, const int __imm)
9202 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
9203 (__v16si) __B, __imm,
9204 (__v16si)
9205 _mm512_undefined_epi32 (),
9206 (__mmask16) -1);
9209 extern __inline __m512i
9210 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9211 _mm512_mask_alignr_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
9212 __m512i __B, const int __imm)
9214 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
9215 (__v16si) __B, __imm,
9216 (__v16si) __W,
9217 (__mmask16) __U);
9220 extern __inline __m512i
9221 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9222 _mm512_maskz_alignr_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
9223 const int __imm)
9225 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
9226 (__v16si) __B, __imm,
9227 (__v16si)
9228 _mm512_setzero_si512 (),
9229 (__mmask16) __U);
9232 extern __inline __m512i
9233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9234 _mm512_alignr_epi64 (__m512i __A, __m512i __B, const int __imm)
9236 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
9237 (__v8di) __B, __imm,
9238 (__v8di)
9239 _mm512_undefined_epi32 (),
9240 (__mmask8) -1);
9243 extern __inline __m512i
9244 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9245 _mm512_mask_alignr_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
9246 __m512i __B, const int __imm)
9248 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
9249 (__v8di) __B, __imm,
9250 (__v8di) __W,
9251 (__mmask8) __U);
9254 extern __inline __m512i
9255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9256 _mm512_maskz_alignr_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
9257 const int __imm)
9259 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
9260 (__v8di) __B, __imm,
9261 (__v8di)
9262 _mm512_setzero_si512 (),
9263 (__mmask8) __U);
9265 #else
9266 #define _mm512_alignr_epi32(X, Y, C) \
9267 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
9268 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_undefined_epi32 (),\
9269 (__mmask16)-1))
9271 #define _mm512_mask_alignr_epi32(W, U, X, Y, C) \
9272 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
9273 (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)(W), \
9274 (__mmask16)(U)))
9276 #define _mm512_maskz_alignr_epi32(U, X, Y, C) \
9277 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
9278 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_setzero_si512 (),\
9279 (__mmask16)(U)))
9281 #define _mm512_alignr_epi64(X, Y, C) \
9282 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
9283 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_undefined_epi32 (), \
9284 (__mmask8)-1))
9286 #define _mm512_mask_alignr_epi64(W, U, X, Y, C) \
9287 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
9288 (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)(W), (__mmask8)(U)))
9290 #define _mm512_maskz_alignr_epi64(U, X, Y, C) \
9291 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
9292 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_setzero_si512 (),\
9293 (__mmask8)(U)))
9294 #endif
9296 extern __inline __mmask16
9297 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9298 _mm512_cmpeq_epi32_mask (__m512i __A, __m512i __B)
9300 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
9301 (__v16si) __B,
9302 (__mmask16) -1);
9305 extern __inline __mmask16
9306 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9307 _mm512_mask_cmpeq_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
9309 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
9310 (__v16si) __B, __U);
9313 extern __inline __mmask8
9314 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9315 _mm512_mask_cmpeq_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
9317 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
9318 (__v8di) __B, __U);
9321 extern __inline __mmask8
9322 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9323 _mm512_cmpeq_epi64_mask (__m512i __A, __m512i __B)
9325 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
9326 (__v8di) __B,
9327 (__mmask8) -1);
9330 extern __inline __mmask16
9331 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9332 _mm512_cmpgt_epi32_mask (__m512i __A, __m512i __B)
9334 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
9335 (__v16si) __B,
9336 (__mmask16) -1);
9339 extern __inline __mmask16
9340 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9341 _mm512_mask_cmpgt_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
9343 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
9344 (__v16si) __B, __U);
9347 extern __inline __mmask8
9348 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9349 _mm512_mask_cmpgt_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
9351 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
9352 (__v8di) __B, __U);
9355 extern __inline __mmask8
9356 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9357 _mm512_cmpgt_epi64_mask (__m512i __A, __m512i __B)
9359 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
9360 (__v8di) __B,
9361 (__mmask8) -1);
9364 extern __inline __mmask16
9365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9366 _mm512_cmpge_epi32_mask (__m512i __X, __m512i __Y)
9368 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9369 (__v16si) __Y, 5,
9370 (__mmask16) -1);
9373 extern __inline __mmask16
9374 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9375 _mm512_mask_cmpge_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9377 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9378 (__v16si) __Y, 5,
9379 (__mmask16) __M);
9382 extern __inline __mmask16
9383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9384 _mm512_mask_cmpge_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9386 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9387 (__v16si) __Y, 5,
9388 (__mmask16) __M);
9391 extern __inline __mmask16
9392 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9393 _mm512_cmpge_epu32_mask (__m512i __X, __m512i __Y)
9395 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9396 (__v16si) __Y, 5,
9397 (__mmask16) -1);
9400 extern __inline __mmask8
9401 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9402 _mm512_mask_cmpge_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9404 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9405 (__v8di) __Y, 5,
9406 (__mmask8) __M);
9409 extern __inline __mmask8
9410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9411 _mm512_cmpge_epi64_mask (__m512i __X, __m512i __Y)
9413 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9414 (__v8di) __Y, 5,
9415 (__mmask8) -1);
9418 extern __inline __mmask8
9419 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9420 _mm512_mask_cmpge_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9422 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9423 (__v8di) __Y, 5,
9424 (__mmask8) __M);
9427 extern __inline __mmask8
9428 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9429 _mm512_cmpge_epu64_mask (__m512i __X, __m512i __Y)
9431 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9432 (__v8di) __Y, 5,
9433 (__mmask8) -1);
9436 extern __inline __mmask16
9437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9438 _mm512_mask_cmple_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9440 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9441 (__v16si) __Y, 2,
9442 (__mmask16) __M);
9445 extern __inline __mmask16
9446 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9447 _mm512_cmple_epi32_mask (__m512i __X, __m512i __Y)
9449 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9450 (__v16si) __Y, 2,
9451 (__mmask16) -1);
9454 extern __inline __mmask16
9455 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9456 _mm512_mask_cmple_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9458 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9459 (__v16si) __Y, 2,
9460 (__mmask16) __M);
9463 extern __inline __mmask16
9464 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9465 _mm512_cmple_epu32_mask (__m512i __X, __m512i __Y)
9467 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9468 (__v16si) __Y, 2,
9469 (__mmask16) -1);
9472 extern __inline __mmask8
9473 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9474 _mm512_mask_cmple_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9476 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9477 (__v8di) __Y, 2,
9478 (__mmask8) __M);
9481 extern __inline __mmask8
9482 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9483 _mm512_cmple_epi64_mask (__m512i __X, __m512i __Y)
9485 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9486 (__v8di) __Y, 2,
9487 (__mmask8) -1);
9490 extern __inline __mmask8
9491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9492 _mm512_mask_cmple_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9494 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9495 (__v8di) __Y, 2,
9496 (__mmask8) __M);
9499 extern __inline __mmask8
9500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9501 _mm512_cmple_epu64_mask (__m512i __X, __m512i __Y)
9503 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9504 (__v8di) __Y, 2,
9505 (__mmask8) -1);
9508 extern __inline __mmask16
9509 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9510 _mm512_mask_cmplt_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9512 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9513 (__v16si) __Y, 1,
9514 (__mmask16) __M);
9517 extern __inline __mmask16
9518 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9519 _mm512_cmplt_epi32_mask (__m512i __X, __m512i __Y)
9521 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9522 (__v16si) __Y, 1,
9523 (__mmask16) -1);
9526 extern __inline __mmask16
9527 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9528 _mm512_mask_cmplt_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9530 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9531 (__v16si) __Y, 1,
9532 (__mmask16) __M);
9535 extern __inline __mmask16
9536 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9537 _mm512_cmplt_epu32_mask (__m512i __X, __m512i __Y)
9539 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9540 (__v16si) __Y, 1,
9541 (__mmask16) -1);
9544 extern __inline __mmask8
9545 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9546 _mm512_mask_cmplt_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9548 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9549 (__v8di) __Y, 1,
9550 (__mmask8) __M);
9553 extern __inline __mmask8
9554 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9555 _mm512_cmplt_epi64_mask (__m512i __X, __m512i __Y)
9557 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9558 (__v8di) __Y, 1,
9559 (__mmask8) -1);
9562 extern __inline __mmask8
9563 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9564 _mm512_mask_cmplt_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9566 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9567 (__v8di) __Y, 1,
9568 (__mmask8) __M);
9571 extern __inline __mmask8
9572 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9573 _mm512_cmplt_epu64_mask (__m512i __X, __m512i __Y)
9575 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9576 (__v8di) __Y, 1,
9577 (__mmask8) -1);
9580 extern __inline __mmask16
9581 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9582 _mm512_cmpneq_epi32_mask (__m512i __X, __m512i __Y)
9584 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9585 (__v16si) __Y, 4,
9586 (__mmask16) -1);
9589 extern __inline __mmask16
9590 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9591 _mm512_mask_cmpneq_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9593 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9594 (__v16si) __Y, 4,
9595 (__mmask16) __M);
9598 extern __inline __mmask16
9599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9600 _mm512_mask_cmpneq_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9602 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9603 (__v16si) __Y, 4,
9604 (__mmask16) __M);
9607 extern __inline __mmask16
9608 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9609 _mm512_cmpneq_epu32_mask (__m512i __X, __m512i __Y)
9611 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9612 (__v16si) __Y, 4,
9613 (__mmask16) -1);
9616 extern __inline __mmask8
9617 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9618 _mm512_mask_cmpneq_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9620 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9621 (__v8di) __Y, 4,
9622 (__mmask8) __M);
9625 extern __inline __mmask8
9626 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9627 _mm512_cmpneq_epi64_mask (__m512i __X, __m512i __Y)
9629 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9630 (__v8di) __Y, 4,
9631 (__mmask8) -1);
9634 extern __inline __mmask8
9635 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9636 _mm512_mask_cmpneq_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9638 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9639 (__v8di) __Y, 4,
9640 (__mmask8) __M);
9643 extern __inline __mmask8
9644 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9645 _mm512_cmpneq_epu64_mask (__m512i __X, __m512i __Y)
9647 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9648 (__v8di) __Y, 4,
9649 (__mmask8) -1);
9652 #define _MM_CMPINT_EQ 0x0
9653 #define _MM_CMPINT_LT 0x1
9654 #define _MM_CMPINT_LE 0x2
9655 #define _MM_CMPINT_UNUSED 0x3
9656 #define _MM_CMPINT_NE 0x4
9657 #define _MM_CMPINT_NLT 0x5
9658 #define _MM_CMPINT_GE 0x5
9659 #define _MM_CMPINT_NLE 0x6
9660 #define _MM_CMPINT_GT 0x6
9662 #ifdef __OPTIMIZE__
9663 extern __inline __mmask16
9664 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9665 _kshiftli_mask16 (__mmask16 __A, unsigned int __B)
9667 return (__mmask16) __builtin_ia32_kshiftlihi ((__mmask16) __A,
9668 (__mmask8) __B);
9671 extern __inline __mmask16
9672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9673 _kshiftri_mask16 (__mmask16 __A, unsigned int __B)
9675 return (__mmask16) __builtin_ia32_kshiftrihi ((__mmask16) __A,
9676 (__mmask8) __B);
9679 extern __inline __mmask8
9680 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9681 _mm512_cmp_epi64_mask (__m512i __X, __m512i __Y, const int __P)
9683 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9684 (__v8di) __Y, __P,
9685 (__mmask8) -1);
9688 extern __inline __mmask16
9689 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9690 _mm512_cmp_epi32_mask (__m512i __X, __m512i __Y, const int __P)
9692 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9693 (__v16si) __Y, __P,
9694 (__mmask16) -1);
9697 extern __inline __mmask8
9698 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9699 _mm512_cmp_epu64_mask (__m512i __X, __m512i __Y, const int __P)
9701 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9702 (__v8di) __Y, __P,
9703 (__mmask8) -1);
9706 extern __inline __mmask16
9707 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9708 _mm512_cmp_epu32_mask (__m512i __X, __m512i __Y, const int __P)
9710 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9711 (__v16si) __Y, __P,
9712 (__mmask16) -1);
9715 extern __inline __mmask8
9716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9717 _mm512_cmp_round_pd_mask (__m512d __X, __m512d __Y, const int __P,
9718 const int __R)
9720 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
9721 (__v8df) __Y, __P,
9722 (__mmask8) -1, __R);
9725 extern __inline __mmask16
9726 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9727 _mm512_cmp_round_ps_mask (__m512 __X, __m512 __Y, const int __P, const int __R)
9729 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
9730 (__v16sf) __Y, __P,
9731 (__mmask16) -1, __R);
9734 extern __inline __mmask8
9735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9736 _mm512_mask_cmp_epi64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
9737 const int __P)
9739 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9740 (__v8di) __Y, __P,
9741 (__mmask8) __U);
9744 extern __inline __mmask16
9745 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9746 _mm512_mask_cmp_epi32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
9747 const int __P)
9749 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9750 (__v16si) __Y, __P,
9751 (__mmask16) __U);
9754 extern __inline __mmask8
9755 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9756 _mm512_mask_cmp_epu64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
9757 const int __P)
9759 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9760 (__v8di) __Y, __P,
9761 (__mmask8) __U);
9764 extern __inline __mmask16
9765 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9766 _mm512_mask_cmp_epu32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
9767 const int __P)
9769 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9770 (__v16si) __Y, __P,
9771 (__mmask16) __U);
9774 extern __inline __mmask8
9775 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9776 _mm512_mask_cmp_round_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y,
9777 const int __P, const int __R)
9779 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
9780 (__v8df) __Y, __P,
9781 (__mmask8) __U, __R);
9784 extern __inline __mmask16
9785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9786 _mm512_mask_cmp_round_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y,
9787 const int __P, const int __R)
9789 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
9790 (__v16sf) __Y, __P,
9791 (__mmask16) __U, __R);
9794 extern __inline __mmask8
9795 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9796 _mm_cmp_round_sd_mask (__m128d __X, __m128d __Y, const int __P, const int __R)
9798 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
9799 (__v2df) __Y, __P,
9800 (__mmask8) -1, __R);
9803 extern __inline __mmask8
9804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9805 _mm_mask_cmp_round_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y,
9806 const int __P, const int __R)
9808 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
9809 (__v2df) __Y, __P,
9810 (__mmask8) __M, __R);
9813 extern __inline __mmask8
9814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9815 _mm_cmp_round_ss_mask (__m128 __X, __m128 __Y, const int __P, const int __R)
9817 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
9818 (__v4sf) __Y, __P,
9819 (__mmask8) -1, __R);
9822 extern __inline __mmask8
9823 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9824 _mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y,
9825 const int __P, const int __R)
9827 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
9828 (__v4sf) __Y, __P,
9829 (__mmask8) __M, __R);
9832 #else
9833 #define _kshiftli_mask16(X, Y) \
9834 ((__mmask16) __builtin_ia32_kshiftlihi ((__mmask16)(X), (__mmask8)(Y)))
9836 #define _kshiftri_mask16(X, Y) \
9837 ((__mmask16) __builtin_ia32_kshiftrihi ((__mmask16)(X), (__mmask8)(Y)))
9839 #define _mm512_cmp_epi64_mask(X, Y, P) \
9840 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
9841 (__v8di)(__m512i)(Y), (int)(P),\
9842 (__mmask8)-1))
9844 #define _mm512_cmp_epi32_mask(X, Y, P) \
9845 ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
9846 (__v16si)(__m512i)(Y), (int)(P), \
9847 (__mmask16)-1))
9849 #define _mm512_cmp_epu64_mask(X, Y, P) \
9850 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
9851 (__v8di)(__m512i)(Y), (int)(P),\
9852 (__mmask8)-1))
9854 #define _mm512_cmp_epu32_mask(X, Y, P) \
9855 ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
9856 (__v16si)(__m512i)(Y), (int)(P), \
9857 (__mmask16)-1))
9859 #define _mm512_cmp_round_pd_mask(X, Y, P, R) \
9860 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
9861 (__v8df)(__m512d)(Y), (int)(P),\
9862 (__mmask8)-1, R))
9864 #define _mm512_cmp_round_ps_mask(X, Y, P, R) \
9865 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
9866 (__v16sf)(__m512)(Y), (int)(P),\
9867 (__mmask16)-1, R))
9869 #define _mm512_mask_cmp_epi64_mask(M, X, Y, P) \
9870 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
9871 (__v8di)(__m512i)(Y), (int)(P),\
9872 (__mmask8)M))
9874 #define _mm512_mask_cmp_epi32_mask(M, X, Y, P) \
9875 ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
9876 (__v16si)(__m512i)(Y), (int)(P), \
9877 (__mmask16)M))
9879 #define _mm512_mask_cmp_epu64_mask(M, X, Y, P) \
9880 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
9881 (__v8di)(__m512i)(Y), (int)(P),\
9882 (__mmask8)M))
9884 #define _mm512_mask_cmp_epu32_mask(M, X, Y, P) \
9885 ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
9886 (__v16si)(__m512i)(Y), (int)(P), \
9887 (__mmask16)M))
9889 #define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R) \
9890 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
9891 (__v8df)(__m512d)(Y), (int)(P),\
9892 (__mmask8)M, R))
9894 #define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R) \
9895 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
9896 (__v16sf)(__m512)(Y), (int)(P),\
9897 (__mmask16)M, R))
9899 #define _mm_cmp_round_sd_mask(X, Y, P, R) \
9900 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
9901 (__v2df)(__m128d)(Y), (int)(P),\
9902 (__mmask8)-1, R))
9904 #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
9905 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
9906 (__v2df)(__m128d)(Y), (int)(P),\
9907 (M), R))
9909 #define _mm_cmp_round_ss_mask(X, Y, P, R) \
9910 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
9911 (__v4sf)(__m128)(Y), (int)(P), \
9912 (__mmask8)-1, R))
9914 #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
9915 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
9916 (__v4sf)(__m128)(Y), (int)(P), \
9917 (M), R))
9918 #endif
9920 #ifdef __OPTIMIZE__
9921 extern __inline __m512
9922 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9923 _mm512_i32gather_ps (__m512i __index, void const *__addr, int __scale)
9925 __m512 __v1_old = _mm512_undefined_ps ();
9926 __mmask16 __mask = 0xFFFF;
9928 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) __v1_old,
9929 __addr,
9930 (__v16si) __index,
9931 __mask, __scale);
9934 extern __inline __m512
9935 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9936 _mm512_mask_i32gather_ps (__m512 __v1_old, __mmask16 __mask,
9937 __m512i __index, void const *__addr, int __scale)
9939 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) __v1_old,
9940 __addr,
9941 (__v16si) __index,
9942 __mask, __scale);
9945 extern __inline __m512d
9946 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9947 _mm512_i32gather_pd (__m256i __index, void const *__addr, int __scale)
9949 __m512d __v1_old = _mm512_undefined_pd ();
9950 __mmask8 __mask = 0xFF;
9952 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
9953 __addr,
9954 (__v8si) __index, __mask,
9955 __scale);
9958 extern __inline __m512d
9959 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9960 _mm512_mask_i32gather_pd (__m512d __v1_old, __mmask8 __mask,
9961 __m256i __index, void const *__addr, int __scale)
9963 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
9964 __addr,
9965 (__v8si) __index,
9966 __mask, __scale);
9969 extern __inline __m256
9970 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9971 _mm512_i64gather_ps (__m512i __index, void const *__addr, int __scale)
9973 __m256 __v1_old = _mm256_undefined_ps ();
9974 __mmask8 __mask = 0xFF;
9976 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
9977 __addr,
9978 (__v8di) __index, __mask,
9979 __scale);
9982 extern __inline __m256
9983 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9984 _mm512_mask_i64gather_ps (__m256 __v1_old, __mmask8 __mask,
9985 __m512i __index, void const *__addr, int __scale)
9987 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
9988 __addr,
9989 (__v8di) __index,
9990 __mask, __scale);
9993 extern __inline __m512d
9994 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9995 _mm512_i64gather_pd (__m512i __index, void const *__addr, int __scale)
9997 __m512d __v1_old = _mm512_undefined_pd ();
9998 __mmask8 __mask = 0xFF;
10000 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
10001 __addr,
10002 (__v8di) __index, __mask,
10003 __scale);
10006 extern __inline __m512d
10007 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10008 _mm512_mask_i64gather_pd (__m512d __v1_old, __mmask8 __mask,
10009 __m512i __index, void const *__addr, int __scale)
10011 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
10012 __addr,
10013 (__v8di) __index,
10014 __mask, __scale);
10017 extern __inline __m512i
10018 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10019 _mm512_i32gather_epi32 (__m512i __index, void const *__addr, int __scale)
10021 __m512i __v1_old = _mm512_undefined_epi32 ();
10022 __mmask16 __mask = 0xFFFF;
10024 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
10025 __addr,
10026 (__v16si) __index,
10027 __mask, __scale);
10030 extern __inline __m512i
10031 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10032 _mm512_mask_i32gather_epi32 (__m512i __v1_old, __mmask16 __mask,
10033 __m512i __index, void const *__addr, int __scale)
10035 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
10036 __addr,
10037 (__v16si) __index,
10038 __mask, __scale);
10041 extern __inline __m512i
10042 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10043 _mm512_i32gather_epi64 (__m256i __index, void const *__addr, int __scale)
10045 __m512i __v1_old = _mm512_undefined_epi32 ();
10046 __mmask8 __mask = 0xFF;
10048 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
10049 __addr,
10050 (__v8si) __index, __mask,
10051 __scale);
10054 extern __inline __m512i
10055 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10056 _mm512_mask_i32gather_epi64 (__m512i __v1_old, __mmask8 __mask,
10057 __m256i __index, void const *__addr,
10058 int __scale)
10060 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
10061 __addr,
10062 (__v8si) __index,
10063 __mask, __scale);
10066 extern __inline __m256i
10067 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10068 _mm512_i64gather_epi32 (__m512i __index, void const *__addr, int __scale)
10070 __m256i __v1_old = _mm256_undefined_si256 ();
10071 __mmask8 __mask = 0xFF;
10073 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
10074 __addr,
10075 (__v8di) __index,
10076 __mask, __scale);
10079 extern __inline __m256i
10080 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10081 _mm512_mask_i64gather_epi32 (__m256i __v1_old, __mmask8 __mask,
10082 __m512i __index, void const *__addr, int __scale)
10084 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
10085 __addr,
10086 (__v8di) __index,
10087 __mask, __scale);
10090 extern __inline __m512i
10091 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10092 _mm512_i64gather_epi64 (__m512i __index, void const *__addr, int __scale)
10094 __m512i __v1_old = _mm512_undefined_epi32 ();
10095 __mmask8 __mask = 0xFF;
10097 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
10098 __addr,
10099 (__v8di) __index, __mask,
10100 __scale);
10103 extern __inline __m512i
10104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10105 _mm512_mask_i64gather_epi64 (__m512i __v1_old, __mmask8 __mask,
10106 __m512i __index, void const *__addr,
10107 int __scale)
10109 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
10110 __addr,
10111 (__v8di) __index,
10112 __mask, __scale);
10115 extern __inline void
10116 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10117 _mm512_i32scatter_ps (void *__addr, __m512i __index, __m512 __v1, int __scale)
10119 __builtin_ia32_scattersiv16sf (__addr, (__mmask16) 0xFFFF,
10120 (__v16si) __index, (__v16sf) __v1, __scale);
10123 extern __inline void
10124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10125 _mm512_mask_i32scatter_ps (void *__addr, __mmask16 __mask,
10126 __m512i __index, __m512 __v1, int __scale)
10128 __builtin_ia32_scattersiv16sf (__addr, __mask, (__v16si) __index,
10129 (__v16sf) __v1, __scale);
10132 extern __inline void
10133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10134 _mm512_i32scatter_pd (void *__addr, __m256i __index, __m512d __v1,
10135 int __scale)
10137 __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF,
10138 (__v8si) __index, (__v8df) __v1, __scale);
10141 extern __inline void
10142 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10143 _mm512_mask_i32scatter_pd (void *__addr, __mmask8 __mask,
10144 __m256i __index, __m512d __v1, int __scale)
10146 __builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index,
10147 (__v8df) __v1, __scale);
10150 extern __inline void
10151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10152 _mm512_i64scatter_ps (void *__addr, __m512i __index, __m256 __v1, int __scale)
10154 __builtin_ia32_scatterdiv16sf (__addr, (__mmask8) 0xFF,
10155 (__v8di) __index, (__v8sf) __v1, __scale);
10158 extern __inline void
10159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10160 _mm512_mask_i64scatter_ps (void *__addr, __mmask8 __mask,
10161 __m512i __index, __m256 __v1, int __scale)
10163 __builtin_ia32_scatterdiv16sf (__addr, __mask, (__v8di) __index,
10164 (__v8sf) __v1, __scale);
10167 extern __inline void
10168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10169 _mm512_i64scatter_pd (void *__addr, __m512i __index, __m512d __v1,
10170 int __scale)
10172 __builtin_ia32_scatterdiv8df (__addr, (__mmask8) 0xFF,
10173 (__v8di) __index, (__v8df) __v1, __scale);
10176 extern __inline void
10177 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10178 _mm512_mask_i64scatter_pd (void *__addr, __mmask8 __mask,
10179 __m512i __index, __m512d __v1, int __scale)
10181 __builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index,
10182 (__v8df) __v1, __scale);
10185 extern __inline void
10186 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10187 _mm512_i32scatter_epi32 (void *__addr, __m512i __index,
10188 __m512i __v1, int __scale)
10190 __builtin_ia32_scattersiv16si (__addr, (__mmask16) 0xFFFF,
10191 (__v16si) __index, (__v16si) __v1, __scale);
10194 extern __inline void
10195 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10196 _mm512_mask_i32scatter_epi32 (void *__addr, __mmask16 __mask,
10197 __m512i __index, __m512i __v1, int __scale)
10199 __builtin_ia32_scattersiv16si (__addr, __mask, (__v16si) __index,
10200 (__v16si) __v1, __scale);
10203 extern __inline void
10204 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10205 _mm512_i32scatter_epi64 (void *__addr, __m256i __index,
10206 __m512i __v1, int __scale)
10208 __builtin_ia32_scattersiv8di (__addr, (__mmask8) 0xFF,
10209 (__v8si) __index, (__v8di) __v1, __scale);
10212 extern __inline void
10213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10214 _mm512_mask_i32scatter_epi64 (void *__addr, __mmask8 __mask,
10215 __m256i __index, __m512i __v1, int __scale)
10217 __builtin_ia32_scattersiv8di (__addr, __mask, (__v8si) __index,
10218 (__v8di) __v1, __scale);
10221 extern __inline void
10222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10223 _mm512_i64scatter_epi32 (void *__addr, __m512i __index,
10224 __m256i __v1, int __scale)
10226 __builtin_ia32_scatterdiv16si (__addr, (__mmask8) 0xFF,
10227 (__v8di) __index, (__v8si) __v1, __scale);
10230 extern __inline void
10231 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10232 _mm512_mask_i64scatter_epi32 (void *__addr, __mmask8 __mask,
10233 __m512i __index, __m256i __v1, int __scale)
10235 __builtin_ia32_scatterdiv16si (__addr, __mask, (__v8di) __index,
10236 (__v8si) __v1, __scale);
10239 extern __inline void
10240 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10241 _mm512_i64scatter_epi64 (void *__addr, __m512i __index,
10242 __m512i __v1, int __scale)
10244 __builtin_ia32_scatterdiv8di (__addr, (__mmask8) 0xFF,
10245 (__v8di) __index, (__v8di) __v1, __scale);
10248 extern __inline void
10249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10250 _mm512_mask_i64scatter_epi64 (void *__addr, __mmask8 __mask,
10251 __m512i __index, __m512i __v1, int __scale)
10253 __builtin_ia32_scatterdiv8di (__addr, __mask, (__v8di) __index,
10254 (__v8di) __v1, __scale);
10256 #else
10257 #define _mm512_i32gather_ps(INDEX, ADDR, SCALE) \
10258 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)_mm512_undefined_ps(),\
10259 (void const *)ADDR, \
10260 (__v16si)(__m512i)INDEX, \
10261 (__mmask16)0xFFFF, (int)SCALE)
10263 #define _mm512_mask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
10264 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)(__m512)V1OLD, \
10265 (void const *)ADDR, \
10266 (__v16si)(__m512i)INDEX, \
10267 (__mmask16)MASK, (int)SCALE)
10269 #define _mm512_i32gather_pd(INDEX, ADDR, SCALE) \
10270 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)_mm512_undefined_pd(), \
10271 (void const *)ADDR, \
10272 (__v8si)(__m256i)INDEX, \
10273 (__mmask8)0xFF, (int)SCALE)
10275 #define _mm512_mask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
10276 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)(__m512d)V1OLD, \
10277 (void const *)ADDR, \
10278 (__v8si)(__m256i)INDEX, \
10279 (__mmask8)MASK, (int)SCALE)
10281 #define _mm512_i64gather_ps(INDEX, ADDR, SCALE) \
10282 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)_mm256_undefined_ps(), \
10283 (void const *)ADDR, \
10284 (__v8di)(__m512i)INDEX, \
10285 (__mmask8)0xFF, (int)SCALE)
10287 #define _mm512_mask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
10288 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)(__m256)V1OLD, \
10289 (void const *)ADDR, \
10290 (__v8di)(__m512i)INDEX, \
10291 (__mmask8)MASK, (int)SCALE)
10293 #define _mm512_i64gather_pd(INDEX, ADDR, SCALE) \
10294 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)_mm512_undefined_pd(), \
10295 (void const *)ADDR, \
10296 (__v8di)(__m512i)INDEX, \
10297 (__mmask8)0xFF, (int)SCALE)
10299 #define _mm512_mask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
10300 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)(__m512d)V1OLD, \
10301 (void const *)ADDR, \
10302 (__v8di)(__m512i)INDEX, \
10303 (__mmask8)MASK, (int)SCALE)
10305 #define _mm512_i32gather_epi32(INDEX, ADDR, SCALE) \
10306 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_undefined_epi32 (), \
10307 (void const *)ADDR, \
10308 (__v16si)(__m512i)INDEX, \
10309 (__mmask16)0xFFFF, (int)SCALE)
10311 #define _mm512_mask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
10312 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)(__m512i)V1OLD, \
10313 (void const *)ADDR, \
10314 (__v16si)(__m512i)INDEX, \
10315 (__mmask16)MASK, (int)SCALE)
10317 #define _mm512_i32gather_epi64(INDEX, ADDR, SCALE) \
10318 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_undefined_epi32 (), \
10319 (void const *)ADDR, \
10320 (__v8si)(__m256i)INDEX, \
10321 (__mmask8)0xFF, (int)SCALE)
10323 #define _mm512_mask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
10324 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)(__m512i)V1OLD, \
10325 (void const *)ADDR, \
10326 (__v8si)(__m256i)INDEX, \
10327 (__mmask8)MASK, (int)SCALE)
10329 #define _mm512_i64gather_epi32(INDEX, ADDR, SCALE) \
10330 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_undefined_si256(), \
10331 (void const *)ADDR, \
10332 (__v8di)(__m512i)INDEX, \
10333 (__mmask8)0xFF, (int)SCALE)
10335 #define _mm512_mask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
10336 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)(__m256i)V1OLD, \
10337 (void const *)ADDR, \
10338 (__v8di)(__m512i)INDEX, \
10339 (__mmask8)MASK, (int)SCALE)
10341 #define _mm512_i64gather_epi64(INDEX, ADDR, SCALE) \
10342 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_undefined_epi32 (), \
10343 (void const *)ADDR, \
10344 (__v8di)(__m512i)INDEX, \
10345 (__mmask8)0xFF, (int)SCALE)
10347 #define _mm512_mask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
10348 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)(__m512i)V1OLD, \
10349 (void const *)ADDR, \
10350 (__v8di)(__m512i)INDEX, \
10351 (__mmask8)MASK, (int)SCALE)
10353 #define _mm512_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
10354 __builtin_ia32_scattersiv16sf ((void *)ADDR, (__mmask16)0xFFFF, \
10355 (__v16si)(__m512i)INDEX, \
10356 (__v16sf)(__m512)V1, (int)SCALE)
10358 #define _mm512_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
10359 __builtin_ia32_scattersiv16sf ((void *)ADDR, (__mmask16)MASK, \
10360 (__v16si)(__m512i)INDEX, \
10361 (__v16sf)(__m512)V1, (int)SCALE)
10363 #define _mm512_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
10364 __builtin_ia32_scattersiv8df ((void *)ADDR, (__mmask8)0xFF, \
10365 (__v8si)(__m256i)INDEX, \
10366 (__v8df)(__m512d)V1, (int)SCALE)
10368 #define _mm512_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
10369 __builtin_ia32_scattersiv8df ((void *)ADDR, (__mmask8)MASK, \
10370 (__v8si)(__m256i)INDEX, \
10371 (__v8df)(__m512d)V1, (int)SCALE)
10373 #define _mm512_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
10374 __builtin_ia32_scatterdiv16sf ((void *)ADDR, (__mmask8)0xFF, \
10375 (__v8di)(__m512i)INDEX, \
10376 (__v8sf)(__m256)V1, (int)SCALE)
10378 #define _mm512_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
10379 __builtin_ia32_scatterdiv16sf ((void *)ADDR, (__mmask16)MASK, \
10380 (__v8di)(__m512i)INDEX, \
10381 (__v8sf)(__m256)V1, (int)SCALE)
10383 #define _mm512_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
10384 __builtin_ia32_scatterdiv8df ((void *)ADDR, (__mmask8)0xFF, \
10385 (__v8di)(__m512i)INDEX, \
10386 (__v8df)(__m512d)V1, (int)SCALE)
10388 #define _mm512_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
10389 __builtin_ia32_scatterdiv8df ((void *)ADDR, (__mmask8)MASK, \
10390 (__v8di)(__m512i)INDEX, \
10391 (__v8df)(__m512d)V1, (int)SCALE)
10393 #define _mm512_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
10394 __builtin_ia32_scattersiv16si ((void *)ADDR, (__mmask16)0xFFFF, \
10395 (__v16si)(__m512i)INDEX, \
10396 (__v16si)(__m512i)V1, (int)SCALE)
10398 #define _mm512_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
10399 __builtin_ia32_scattersiv16si ((void *)ADDR, (__mmask16)MASK, \
10400 (__v16si)(__m512i)INDEX, \
10401 (__v16si)(__m512i)V1, (int)SCALE)
10403 #define _mm512_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
10404 __builtin_ia32_scattersiv8di ((void *)ADDR, (__mmask8)0xFF, \
10405 (__v8si)(__m256i)INDEX, \
10406 (__v8di)(__m512i)V1, (int)SCALE)
10408 #define _mm512_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
10409 __builtin_ia32_scattersiv8di ((void *)ADDR, (__mmask8)MASK, \
10410 (__v8si)(__m256i)INDEX, \
10411 (__v8di)(__m512i)V1, (int)SCALE)
10413 #define _mm512_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
10414 __builtin_ia32_scatterdiv16si ((void *)ADDR, (__mmask8)0xFF, \
10415 (__v8di)(__m512i)INDEX, \
10416 (__v8si)(__m256i)V1, (int)SCALE)
10418 #define _mm512_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
10419 __builtin_ia32_scatterdiv16si ((void *)ADDR, (__mmask8)MASK, \
10420 (__v8di)(__m512i)INDEX, \
10421 (__v8si)(__m256i)V1, (int)SCALE)
10423 #define _mm512_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
10424 __builtin_ia32_scatterdiv8di ((void *)ADDR, (__mmask8)0xFF, \
10425 (__v8di)(__m512i)INDEX, \
10426 (__v8di)(__m512i)V1, (int)SCALE)
10428 #define _mm512_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
10429 __builtin_ia32_scatterdiv8di ((void *)ADDR, (__mmask8)MASK, \
10430 (__v8di)(__m512i)INDEX, \
10431 (__v8di)(__m512i)V1, (int)SCALE)
10432 #endif
10434 extern __inline __m512d
10435 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10436 _mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
10438 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
10439 (__v8df) __W,
10440 (__mmask8) __U);
10443 extern __inline __m512d
10444 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10445 _mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
10447 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
10448 (__v8df)
10449 _mm512_setzero_pd (),
10450 (__mmask8) __U);
10453 extern __inline void
10454 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10455 _mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
10457 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
10458 (__mmask8) __U);
10461 extern __inline __m512
10462 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10463 _mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
10465 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
10466 (__v16sf) __W,
10467 (__mmask16) __U);
10470 extern __inline __m512
10471 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10472 _mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
10474 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
10475 (__v16sf)
10476 _mm512_setzero_ps (),
10477 (__mmask16) __U);
10480 extern __inline void
10481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10482 _mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
10484 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
10485 (__mmask16) __U);
10488 extern __inline __m512i
10489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10490 _mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
10492 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
10493 (__v8di) __W,
10494 (__mmask8) __U);
10497 extern __inline __m512i
10498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10499 _mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
10501 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
10502 (__v8di)
10503 _mm512_setzero_si512 (),
10504 (__mmask8) __U);
10507 extern __inline void
10508 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10509 _mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
10511 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
10512 (__mmask8) __U);
10515 extern __inline __m512i
10516 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10517 _mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
10519 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
10520 (__v16si) __W,
10521 (__mmask16) __U);
10524 extern __inline __m512i
10525 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10526 _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
10528 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
10529 (__v16si)
10530 _mm512_setzero_si512 (),
10531 (__mmask16) __U);
10534 extern __inline void
10535 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10536 _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
10538 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
10539 (__mmask16) __U);
10542 extern __inline __m512d
10543 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10544 _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
10546 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
10547 (__v8df) __W,
10548 (__mmask8) __U);
10551 extern __inline __m512d
10552 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10553 _mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
10555 return (__m512d) __builtin_ia32_expanddf512_maskz ((__v8df) __A,
10556 (__v8df)
10557 _mm512_setzero_pd (),
10558 (__mmask8) __U);
10561 extern __inline __m512d
10562 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10563 _mm512_mask_expandloadu_pd (__m512d __W, __mmask8 __U, void const *__P)
10565 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *) __P,
10566 (__v8df) __W,
10567 (__mmask8) __U);
10570 extern __inline __m512d
10571 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10572 _mm512_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
10574 return (__m512d) __builtin_ia32_expandloaddf512_maskz ((const __v8df *) __P,
10575 (__v8df)
10576 _mm512_setzero_pd (),
10577 (__mmask8) __U);
10580 extern __inline __m512
10581 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10582 _mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
10584 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
10585 (__v16sf) __W,
10586 (__mmask16) __U);
10589 extern __inline __m512
10590 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10591 _mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
10593 return (__m512) __builtin_ia32_expandsf512_maskz ((__v16sf) __A,
10594 (__v16sf)
10595 _mm512_setzero_ps (),
10596 (__mmask16) __U);
10599 extern __inline __m512
10600 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10601 _mm512_mask_expandloadu_ps (__m512 __W, __mmask16 __U, void const *__P)
10603 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *) __P,
10604 (__v16sf) __W,
10605 (__mmask16) __U);
10608 extern __inline __m512
10609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10610 _mm512_maskz_expandloadu_ps (__mmask16 __U, void const *__P)
10612 return (__m512) __builtin_ia32_expandloadsf512_maskz ((const __v16sf *) __P,
10613 (__v16sf)
10614 _mm512_setzero_ps (),
10615 (__mmask16) __U);
10618 extern __inline __m512i
10619 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10620 _mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
10622 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
10623 (__v8di) __W,
10624 (__mmask8) __U);
10627 extern __inline __m512i
10628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10629 _mm512_maskz_expand_epi64 (__mmask8 __U, __m512i __A)
10631 return (__m512i) __builtin_ia32_expanddi512_maskz ((__v8di) __A,
10632 (__v8di)
10633 _mm512_setzero_si512 (),
10634 (__mmask8) __U);
10637 extern __inline __m512i
10638 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10639 _mm512_mask_expandloadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
10641 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *) __P,
10642 (__v8di) __W,
10643 (__mmask8) __U);
10646 extern __inline __m512i
10647 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10648 _mm512_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
10650 return (__m512i)
10651 __builtin_ia32_expandloaddi512_maskz ((const __v8di *) __P,
10652 (__v8di)
10653 _mm512_setzero_si512 (),
10654 (__mmask8) __U);
10657 extern __inline __m512i
10658 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10659 _mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
10661 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
10662 (__v16si) __W,
10663 (__mmask16) __U);
10666 extern __inline __m512i
10667 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10668 _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
10670 return (__m512i) __builtin_ia32_expandsi512_maskz ((__v16si) __A,
10671 (__v16si)
10672 _mm512_setzero_si512 (),
10673 (__mmask16) __U);
10676 extern __inline __m512i
10677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10678 _mm512_mask_expandloadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
10680 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *) __P,
10681 (__v16si) __W,
10682 (__mmask16) __U);
10685 extern __inline __m512i
10686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10687 _mm512_maskz_expandloadu_epi32 (__mmask16 __U, void const *__P)
10689 return (__m512i) __builtin_ia32_expandloadsi512_maskz ((const __v16si *) __P,
10690 (__v16si)
10691 _mm512_setzero_si512
10692 (), (__mmask16) __U);
10695 /* Mask arithmetic operations */
10696 #define _kand_mask16 _mm512_kand
10697 #define _kandn_mask16 _mm512_kandn
10698 #define _knot_mask16 _mm512_knot
10699 #define _kor_mask16 _mm512_kor
10700 #define _kxnor_mask16 _mm512_kxnor
10701 #define _kxor_mask16 _mm512_kxor
10703 extern __inline unsigned char
10704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10705 _kortest_mask16_u8 (__mmask16 __A, __mmask16 __B, unsigned char *__CF)
10707 *__CF = (unsigned char) __builtin_ia32_kortestchi (__A, __B);
10708 return (unsigned char) __builtin_ia32_kortestzhi (__A, __B);
10711 extern __inline unsigned char
10712 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10713 _kortestz_mask16_u8 (__mmask16 __A, __mmask16 __B)
10715 return (unsigned char) __builtin_ia32_kortestzhi ((__mmask16) __A,
10716 (__mmask16) __B);
10719 extern __inline unsigned char
10720 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10721 _kortestc_mask16_u8 (__mmask16 __A, __mmask16 __B)
10723 return (unsigned char) __builtin_ia32_kortestchi ((__mmask16) __A,
10724 (__mmask16) __B);
10727 extern __inline unsigned int
10728 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10729 _cvtmask16_u32 (__mmask16 __A)
10731 return (unsigned int) __builtin_ia32_kmovw ((__mmask16 ) __A);
10734 extern __inline __mmask16
10735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10736 _cvtu32_mask16 (unsigned int __A)
10738 return (__mmask16) __builtin_ia32_kmovw ((__mmask16 ) __A);
10741 extern __inline __mmask16
10742 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10743 _load_mask16 (__mmask16 *__A)
10745 return (__mmask16) __builtin_ia32_kmovw (*(__mmask16 *) __A);
10748 extern __inline void
10749 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10750 _store_mask16 (__mmask16 *__A, __mmask16 __B)
10752 *(__mmask16 *) __A = __builtin_ia32_kmovw (__B);
10755 extern __inline __mmask16
10756 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10757 _mm512_kand (__mmask16 __A, __mmask16 __B)
10759 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
10762 extern __inline __mmask16
10763 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10764 _mm512_kandn (__mmask16 __A, __mmask16 __B)
10766 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A,
10767 (__mmask16) __B);
10770 extern __inline __mmask16
10771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10772 _mm512_kor (__mmask16 __A, __mmask16 __B)
10774 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
10777 extern __inline int
10778 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10779 _mm512_kortestz (__mmask16 __A, __mmask16 __B)
10781 return (__mmask16) __builtin_ia32_kortestzhi ((__mmask16) __A,
10782 (__mmask16) __B);
10785 extern __inline int
10786 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10787 _mm512_kortestc (__mmask16 __A, __mmask16 __B)
10789 return (__mmask16) __builtin_ia32_kortestchi ((__mmask16) __A,
10790 (__mmask16) __B);
10793 extern __inline __mmask16
10794 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10795 _mm512_kxnor (__mmask16 __A, __mmask16 __B)
10797 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
10800 extern __inline __mmask16
10801 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10802 _mm512_kxor (__mmask16 __A, __mmask16 __B)
10804 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
10807 extern __inline __mmask16
10808 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10809 _mm512_knot (__mmask16 __A)
10811 return (__mmask16) __builtin_ia32_knothi ((__mmask16) __A);
10814 extern __inline __mmask16
10815 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10816 _mm512_kunpackb (__mmask16 __A, __mmask16 __B)
10818 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
10821 extern __inline __mmask16
10822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10823 _kunpackb_mask16 (__mmask8 __A, __mmask8 __B)
10825 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
10828 #ifdef __OPTIMIZE__
10829 extern __inline __m512i
10830 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10831 _mm512_maskz_inserti32x4 (__mmask16 __B, __m512i __C, __m128i __D,
10832 const int __imm)
10834 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
10835 (__v4si) __D,
10836 __imm,
10837 (__v16si)
10838 _mm512_setzero_si512 (),
10839 __B);
10842 extern __inline __m512
10843 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10844 _mm512_maskz_insertf32x4 (__mmask16 __B, __m512 __C, __m128 __D,
10845 const int __imm)
10847 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
10848 (__v4sf) __D,
10849 __imm,
10850 (__v16sf)
10851 _mm512_setzero_ps (), __B);
10854 extern __inline __m512i
10855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10856 _mm512_mask_inserti32x4 (__m512i __A, __mmask16 __B, __m512i __C,
10857 __m128i __D, const int __imm)
10859 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
10860 (__v4si) __D,
10861 __imm,
10862 (__v16si) __A,
10863 __B);
10866 extern __inline __m512
10867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10868 _mm512_mask_insertf32x4 (__m512 __A, __mmask16 __B, __m512 __C,
10869 __m128 __D, const int __imm)
10871 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
10872 (__v4sf) __D,
10873 __imm,
10874 (__v16sf) __A, __B);
10876 #else
10877 #define _mm512_maskz_insertf32x4(A, X, Y, C) \
10878 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
10879 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)_mm512_setzero_ps(), \
10880 (__mmask16)(A)))
10882 #define _mm512_maskz_inserti32x4(A, X, Y, C) \
10883 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
10884 (__v4si)(__m128i) (Y), (int) (C), (__v16si)_mm512_setzero_si512 (), \
10885 (__mmask16)(A)))
10887 #define _mm512_mask_insertf32x4(A, B, X, Y, C) \
10888 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
10889 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (A), \
10890 (__mmask16)(B)))
10892 #define _mm512_mask_inserti32x4(A, B, X, Y, C) \
10893 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
10894 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (A), \
10895 (__mmask16)(B)))
10896 #endif
10898 extern __inline __m512i
10899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10900 _mm512_max_epi64 (__m512i __A, __m512i __B)
10902 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10903 (__v8di) __B,
10904 (__v8di)
10905 _mm512_undefined_epi32 (),
10906 (__mmask8) -1);
10909 extern __inline __m512i
10910 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10911 _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
10913 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10914 (__v8di) __B,
10915 (__v8di)
10916 _mm512_setzero_si512 (),
10917 __M);
10920 extern __inline __m512i
10921 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10922 _mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10924 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10925 (__v8di) __B,
10926 (__v8di) __W, __M);
10929 extern __inline __m512i
10930 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10931 _mm512_min_epi64 (__m512i __A, __m512i __B)
10933 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10934 (__v8di) __B,
10935 (__v8di)
10936 _mm512_undefined_epi32 (),
10937 (__mmask8) -1);
10940 extern __inline __m512i
10941 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10942 _mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10944 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10945 (__v8di) __B,
10946 (__v8di) __W, __M);
10949 extern __inline __m512i
10950 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10951 _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
10953 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10954 (__v8di) __B,
10955 (__v8di)
10956 _mm512_setzero_si512 (),
10957 __M);
10960 extern __inline __m512i
10961 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10962 _mm512_max_epu64 (__m512i __A, __m512i __B)
10964 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10965 (__v8di) __B,
10966 (__v8di)
10967 _mm512_undefined_epi32 (),
10968 (__mmask8) -1);
10971 extern __inline __m512i
10972 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10973 _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
10975 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10976 (__v8di) __B,
10977 (__v8di)
10978 _mm512_setzero_si512 (),
10979 __M);
10982 extern __inline __m512i
10983 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10984 _mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10986 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10987 (__v8di) __B,
10988 (__v8di) __W, __M);
10991 extern __inline __m512i
10992 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10993 _mm512_min_epu64 (__m512i __A, __m512i __B)
10995 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10996 (__v8di) __B,
10997 (__v8di)
10998 _mm512_undefined_epi32 (),
10999 (__mmask8) -1);
11002 extern __inline __m512i
11003 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11004 _mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
11006 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
11007 (__v8di) __B,
11008 (__v8di) __W, __M);
11011 extern __inline __m512i
11012 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11013 _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
11015 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
11016 (__v8di) __B,
11017 (__v8di)
11018 _mm512_setzero_si512 (),
11019 __M);
11022 extern __inline __m512i
11023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11024 _mm512_max_epi32 (__m512i __A, __m512i __B)
11026 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
11027 (__v16si) __B,
11028 (__v16si)
11029 _mm512_undefined_epi32 (),
11030 (__mmask16) -1);
11033 extern __inline __m512i
11034 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11035 _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
11037 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
11038 (__v16si) __B,
11039 (__v16si)
11040 _mm512_setzero_si512 (),
11041 __M);
11044 extern __inline __m512i
11045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11046 _mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
11048 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
11049 (__v16si) __B,
11050 (__v16si) __W, __M);
11053 extern __inline __m512i
11054 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11055 _mm512_min_epi32 (__m512i __A, __m512i __B)
11057 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
11058 (__v16si) __B,
11059 (__v16si)
11060 _mm512_undefined_epi32 (),
11061 (__mmask16) -1);
11064 extern __inline __m512i
11065 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11066 _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
11068 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
11069 (__v16si) __B,
11070 (__v16si)
11071 _mm512_setzero_si512 (),
11072 __M);
11075 extern __inline __m512i
11076 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11077 _mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
11079 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
11080 (__v16si) __B,
11081 (__v16si) __W, __M);
11084 extern __inline __m512i
11085 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11086 _mm512_max_epu32 (__m512i __A, __m512i __B)
11088 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
11089 (__v16si) __B,
11090 (__v16si)
11091 _mm512_undefined_epi32 (),
11092 (__mmask16) -1);
11095 extern __inline __m512i
11096 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11097 _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
11099 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
11100 (__v16si) __B,
11101 (__v16si)
11102 _mm512_setzero_si512 (),
11103 __M);
11106 extern __inline __m512i
11107 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11108 _mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
11110 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
11111 (__v16si) __B,
11112 (__v16si) __W, __M);
11115 extern __inline __m512i
11116 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11117 _mm512_min_epu32 (__m512i __A, __m512i __B)
11119 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
11120 (__v16si) __B,
11121 (__v16si)
11122 _mm512_undefined_epi32 (),
11123 (__mmask16) -1);
11126 extern __inline __m512i
11127 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11128 _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
11130 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
11131 (__v16si) __B,
11132 (__v16si)
11133 _mm512_setzero_si512 (),
11134 __M);
11137 extern __inline __m512i
11138 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11139 _mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
11141 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
11142 (__v16si) __B,
11143 (__v16si) __W, __M);
11146 extern __inline __m512
11147 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11148 _mm512_unpacklo_ps (__m512 __A, __m512 __B)
11150 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
11151 (__v16sf) __B,
11152 (__v16sf)
11153 _mm512_undefined_ps (),
11154 (__mmask16) -1);
11157 extern __inline __m512
11158 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11159 _mm512_mask_unpacklo_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11161 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
11162 (__v16sf) __B,
11163 (__v16sf) __W,
11164 (__mmask16) __U);
11167 extern __inline __m512
11168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11169 _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
11171 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
11172 (__v16sf) __B,
11173 (__v16sf)
11174 _mm512_setzero_ps (),
11175 (__mmask16) __U);
11178 #ifdef __OPTIMIZE__
11179 extern __inline __m128d
11180 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11181 _mm_max_round_sd (__m128d __A, __m128d __B, const int __R)
11183 return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
11184 (__v2df) __B,
11185 __R);
11188 extern __inline __m128d
11189 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11190 _mm_mask_max_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
11191 __m128d __B, const int __R)
11193 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
11194 (__v2df) __B,
11195 (__v2df) __W,
11196 (__mmask8) __U, __R);
11199 extern __inline __m128d
11200 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11201 _mm_maskz_max_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
11202 const int __R)
11204 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
11205 (__v2df) __B,
11206 (__v2df)
11207 _mm_setzero_pd (),
11208 (__mmask8) __U, __R);
11211 extern __inline __m128
11212 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11213 _mm_max_round_ss (__m128 __A, __m128 __B, const int __R)
11215 return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
11216 (__v4sf) __B,
11217 __R);
11220 extern __inline __m128
11221 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11222 _mm_mask_max_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
11223 __m128 __B, const int __R)
11225 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
11226 (__v4sf) __B,
11227 (__v4sf) __W,
11228 (__mmask8) __U, __R);
11231 extern __inline __m128
11232 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11233 _mm_maskz_max_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
11234 const int __R)
11236 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
11237 (__v4sf) __B,
11238 (__v4sf)
11239 _mm_setzero_ps (),
11240 (__mmask8) __U, __R);
11243 extern __inline __m128d
11244 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11245 _mm_min_round_sd (__m128d __A, __m128d __B, const int __R)
11247 return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
11248 (__v2df) __B,
11249 __R);
11252 extern __inline __m128d
11253 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11254 _mm_mask_min_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
11255 __m128d __B, const int __R)
11257 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
11258 (__v2df) __B,
11259 (__v2df) __W,
11260 (__mmask8) __U, __R);
11263 extern __inline __m128d
11264 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11265 _mm_maskz_min_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
11266 const int __R)
11268 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
11269 (__v2df) __B,
11270 (__v2df)
11271 _mm_setzero_pd (),
11272 (__mmask8) __U, __R);
11275 extern __inline __m128
11276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11277 _mm_min_round_ss (__m128 __A, __m128 __B, const int __R)
11279 return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
11280 (__v4sf) __B,
11281 __R);
11284 extern __inline __m128
11285 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11286 _mm_mask_min_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
11287 __m128 __B, const int __R)
11289 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
11290 (__v4sf) __B,
11291 (__v4sf) __W,
11292 (__mmask8) __U, __R);
11295 extern __inline __m128
11296 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11297 _mm_maskz_min_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
11298 const int __R)
11300 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
11301 (__v4sf) __B,
11302 (__v4sf)
11303 _mm_setzero_ps (),
11304 (__mmask8) __U, __R);
11307 #else
11308 #define _mm_max_round_sd(A, B, C) \
11309 (__m128d)__builtin_ia32_maxsd_round(A, B, C)
11311 #define _mm_mask_max_round_sd(W, U, A, B, C) \
11312 (__m128d)__builtin_ia32_maxsd_mask_round(A, B, W, U, C)
11314 #define _mm_maskz_max_round_sd(U, A, B, C) \
11315 (__m128d)__builtin_ia32_maxsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
11317 #define _mm_max_round_ss(A, B, C) \
11318 (__m128)__builtin_ia32_maxss_round(A, B, C)
11320 #define _mm_mask_max_round_ss(W, U, A, B, C) \
11321 (__m128)__builtin_ia32_maxss_mask_round(A, B, W, U, C)
11323 #define _mm_maskz_max_round_ss(U, A, B, C) \
11324 (__m128)__builtin_ia32_maxss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
11326 #define _mm_min_round_sd(A, B, C) \
11327 (__m128d)__builtin_ia32_minsd_round(A, B, C)
11329 #define _mm_mask_min_round_sd(W, U, A, B, C) \
11330 (__m128d)__builtin_ia32_minsd_mask_round(A, B, W, U, C)
11332 #define _mm_maskz_min_round_sd(U, A, B, C) \
11333 (__m128d)__builtin_ia32_minsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
11335 #define _mm_min_round_ss(A, B, C) \
11336 (__m128)__builtin_ia32_minss_round(A, B, C)
11338 #define _mm_mask_min_round_ss(W, U, A, B, C) \
11339 (__m128)__builtin_ia32_minss_mask_round(A, B, W, U, C)
11341 #define _mm_maskz_min_round_ss(U, A, B, C) \
11342 (__m128)__builtin_ia32_minss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
11344 #endif
11346 extern __inline __m512d
11347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11348 _mm512_mask_blend_pd (__mmask8 __U, __m512d __A, __m512d __W)
11350 return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
11351 (__v8df) __W,
11352 (__mmask8) __U);
11355 extern __inline __m512
11356 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11357 _mm512_mask_blend_ps (__mmask16 __U, __m512 __A, __m512 __W)
11359 return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
11360 (__v16sf) __W,
11361 (__mmask16) __U);
11364 extern __inline __m512i
11365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11366 _mm512_mask_blend_epi64 (__mmask8 __U, __m512i __A, __m512i __W)
11368 return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
11369 (__v8di) __W,
11370 (__mmask8) __U);
11373 extern __inline __m512i
11374 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11375 _mm512_mask_blend_epi32 (__mmask16 __U, __m512i __A, __m512i __W)
11377 return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
11378 (__v16si) __W,
11379 (__mmask16) __U);
11382 #ifdef __OPTIMIZE__
11383 extern __inline __m128d
11384 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11385 _mm_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
11387 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
11388 (__v2df) __A,
11389 (__v2df) __B,
11390 __R);
11393 extern __inline __m128
11394 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11395 _mm_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
11397 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
11398 (__v4sf) __A,
11399 (__v4sf) __B,
11400 __R);
11403 extern __inline __m128d
11404 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11405 _mm_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
11407 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
11408 (__v2df) __A,
11409 -(__v2df) __B,
11410 __R);
11413 extern __inline __m128
11414 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11415 _mm_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
11417 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
11418 (__v4sf) __A,
11419 -(__v4sf) __B,
11420 __R);
11423 extern __inline __m128d
11424 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11425 _mm_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
11427 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
11428 -(__v2df) __A,
11429 (__v2df) __B,
11430 __R);
11433 extern __inline __m128
11434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11435 _mm_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
11437 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
11438 -(__v4sf) __A,
11439 (__v4sf) __B,
11440 __R);
11443 extern __inline __m128d
11444 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11445 _mm_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
11447 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
11448 -(__v2df) __A,
11449 -(__v2df) __B,
11450 __R);
11453 extern __inline __m128
11454 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11455 _mm_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
11457 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
11458 -(__v4sf) __A,
11459 -(__v4sf) __B,
11460 __R);
11462 #else
11463 #define _mm_fmadd_round_sd(A, B, C, R) \
11464 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, C, R)
11466 #define _mm_fmadd_round_ss(A, B, C, R) \
11467 (__m128)__builtin_ia32_vfmaddss3_round(A, B, C, R)
11469 #define _mm_fmsub_round_sd(A, B, C, R) \
11470 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, -(C), R)
11472 #define _mm_fmsub_round_ss(A, B, C, R) \
11473 (__m128)__builtin_ia32_vfmaddss3_round(A, B, -(C), R)
11475 #define _mm_fnmadd_round_sd(A, B, C, R) \
11476 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), C, R)
11478 #define _mm_fnmadd_round_ss(A, B, C, R) \
11479 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), C, R)
11481 #define _mm_fnmsub_round_sd(A, B, C, R) \
11482 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), -(C), R)
11484 #define _mm_fnmsub_round_ss(A, B, C, R) \
11485 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), -(C), R)
11486 #endif
11488 #ifdef __OPTIMIZE__
11489 extern __inline int
11490 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11491 _mm_comi_round_ss (__m128 __A, __m128 __B, const int __P, const int __R)
11493 return __builtin_ia32_vcomiss ((__v4sf) __A, (__v4sf) __B, __P, __R);
11496 extern __inline int
11497 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11498 _mm_comi_round_sd (__m128d __A, __m128d __B, const int __P, const int __R)
11500 return __builtin_ia32_vcomisd ((__v2df) __A, (__v2df) __B, __P, __R);
11502 #else
11503 #define _mm_comi_round_ss(A, B, C, D)\
11504 __builtin_ia32_vcomiss(A, B, C, D)
11505 #define _mm_comi_round_sd(A, B, C, D)\
11506 __builtin_ia32_vcomisd(A, B, C, D)
11507 #endif
11509 extern __inline __m512d
11510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11511 _mm512_sqrt_pd (__m512d __A)
11513 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
11514 (__v8df)
11515 _mm512_undefined_pd (),
11516 (__mmask8) -1,
11517 _MM_FROUND_CUR_DIRECTION);
11520 extern __inline __m512d
11521 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11522 _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
11524 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
11525 (__v8df) __W,
11526 (__mmask8) __U,
11527 _MM_FROUND_CUR_DIRECTION);
11530 extern __inline __m512d
11531 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11532 _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
11534 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
11535 (__v8df)
11536 _mm512_setzero_pd (),
11537 (__mmask8) __U,
11538 _MM_FROUND_CUR_DIRECTION);
11541 extern __inline __m512
11542 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11543 _mm512_sqrt_ps (__m512 __A)
11545 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
11546 (__v16sf)
11547 _mm512_undefined_ps (),
11548 (__mmask16) -1,
11549 _MM_FROUND_CUR_DIRECTION);
11552 extern __inline __m512
11553 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11554 _mm512_mask_sqrt_ps (__m512 __W, __mmask16 __U, __m512 __A)
11556 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
11557 (__v16sf) __W,
11558 (__mmask16) __U,
11559 _MM_FROUND_CUR_DIRECTION);
11562 extern __inline __m512
11563 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11564 _mm512_maskz_sqrt_ps (__mmask16 __U, __m512 __A)
11566 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
11567 (__v16sf)
11568 _mm512_setzero_ps (),
11569 (__mmask16) __U,
11570 _MM_FROUND_CUR_DIRECTION);
11573 extern __inline __m512d
11574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11575 _mm512_add_pd (__m512d __A, __m512d __B)
11577 return (__m512d) ((__v8df)__A + (__v8df)__B);
11580 extern __inline __m512d
11581 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11582 _mm512_mask_add_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11584 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
11585 (__v8df) __B,
11586 (__v8df) __W,
11587 (__mmask8) __U,
11588 _MM_FROUND_CUR_DIRECTION);
11591 extern __inline __m512d
11592 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11593 _mm512_maskz_add_pd (__mmask8 __U, __m512d __A, __m512d __B)
11595 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
11596 (__v8df) __B,
11597 (__v8df)
11598 _mm512_setzero_pd (),
11599 (__mmask8) __U,
11600 _MM_FROUND_CUR_DIRECTION);
11603 extern __inline __m512
11604 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11605 _mm512_add_ps (__m512 __A, __m512 __B)
11607 return (__m512) ((__v16sf)__A + (__v16sf)__B);
11610 extern __inline __m512
11611 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11612 _mm512_mask_add_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11614 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
11615 (__v16sf) __B,
11616 (__v16sf) __W,
11617 (__mmask16) __U,
11618 _MM_FROUND_CUR_DIRECTION);
11621 extern __inline __m512
11622 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11623 _mm512_maskz_add_ps (__mmask16 __U, __m512 __A, __m512 __B)
11625 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
11626 (__v16sf) __B,
11627 (__v16sf)
11628 _mm512_setzero_ps (),
11629 (__mmask16) __U,
11630 _MM_FROUND_CUR_DIRECTION);
11633 extern __inline __m128d
11634 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11635 _mm_mask_add_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
11637 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
11638 (__v2df) __B,
11639 (__v2df) __W,
11640 (__mmask8) __U,
11641 _MM_FROUND_CUR_DIRECTION);
11644 extern __inline __m128d
11645 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11646 _mm_maskz_add_sd (__mmask8 __U, __m128d __A, __m128d __B)
11648 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
11649 (__v2df) __B,
11650 (__v2df)
11651 _mm_setzero_pd (),
11652 (__mmask8) __U,
11653 _MM_FROUND_CUR_DIRECTION);
11656 extern __inline __m128
11657 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11658 _mm_mask_add_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
11660 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
11661 (__v4sf) __B,
11662 (__v4sf) __W,
11663 (__mmask8) __U,
11664 _MM_FROUND_CUR_DIRECTION);
11667 extern __inline __m128
11668 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11669 _mm_maskz_add_ss (__mmask8 __U, __m128 __A, __m128 __B)
11671 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
11672 (__v4sf) __B,
11673 (__v4sf)
11674 _mm_setzero_ps (),
11675 (__mmask8) __U,
11676 _MM_FROUND_CUR_DIRECTION);
11679 extern __inline __m512d
11680 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11681 _mm512_sub_pd (__m512d __A, __m512d __B)
11683 return (__m512d) ((__v8df)__A - (__v8df)__B);
11686 extern __inline __m512d
11687 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11688 _mm512_mask_sub_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11690 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
11691 (__v8df) __B,
11692 (__v8df) __W,
11693 (__mmask8) __U,
11694 _MM_FROUND_CUR_DIRECTION);
11697 extern __inline __m512d
11698 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11699 _mm512_maskz_sub_pd (__mmask8 __U, __m512d __A, __m512d __B)
11701 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
11702 (__v8df) __B,
11703 (__v8df)
11704 _mm512_setzero_pd (),
11705 (__mmask8) __U,
11706 _MM_FROUND_CUR_DIRECTION);
11709 extern __inline __m512
11710 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11711 _mm512_sub_ps (__m512 __A, __m512 __B)
11713 return (__m512) ((__v16sf)__A - (__v16sf)__B);
11716 extern __inline __m512
11717 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11718 _mm512_mask_sub_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11720 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
11721 (__v16sf) __B,
11722 (__v16sf) __W,
11723 (__mmask16) __U,
11724 _MM_FROUND_CUR_DIRECTION);
11727 extern __inline __m512
11728 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11729 _mm512_maskz_sub_ps (__mmask16 __U, __m512 __A, __m512 __B)
11731 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
11732 (__v16sf) __B,
11733 (__v16sf)
11734 _mm512_setzero_ps (),
11735 (__mmask16) __U,
11736 _MM_FROUND_CUR_DIRECTION);
11739 extern __inline __m128d
11740 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11741 _mm_mask_sub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
11743 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
11744 (__v2df) __B,
11745 (__v2df) __W,
11746 (__mmask8) __U,
11747 _MM_FROUND_CUR_DIRECTION);
11750 extern __inline __m128d
11751 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11752 _mm_maskz_sub_sd (__mmask8 __U, __m128d __A, __m128d __B)
11754 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
11755 (__v2df) __B,
11756 (__v2df)
11757 _mm_setzero_pd (),
11758 (__mmask8) __U,
11759 _MM_FROUND_CUR_DIRECTION);
11762 extern __inline __m128
11763 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11764 _mm_mask_sub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
11766 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
11767 (__v4sf) __B,
11768 (__v4sf) __W,
11769 (__mmask8) __U,
11770 _MM_FROUND_CUR_DIRECTION);
11773 extern __inline __m128
11774 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11775 _mm_maskz_sub_ss (__mmask8 __U, __m128 __A, __m128 __B)
11777 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
11778 (__v4sf) __B,
11779 (__v4sf)
11780 _mm_setzero_ps (),
11781 (__mmask8) __U,
11782 _MM_FROUND_CUR_DIRECTION);
11785 extern __inline __m512d
11786 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11787 _mm512_mul_pd (__m512d __A, __m512d __B)
11789 return (__m512d) ((__v8df)__A * (__v8df)__B);
11792 extern __inline __m512d
11793 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11794 _mm512_mask_mul_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11796 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
11797 (__v8df) __B,
11798 (__v8df) __W,
11799 (__mmask8) __U,
11800 _MM_FROUND_CUR_DIRECTION);
11803 extern __inline __m512d
11804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11805 _mm512_maskz_mul_pd (__mmask8 __U, __m512d __A, __m512d __B)
11807 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
11808 (__v8df) __B,
11809 (__v8df)
11810 _mm512_setzero_pd (),
11811 (__mmask8) __U,
11812 _MM_FROUND_CUR_DIRECTION);
11815 extern __inline __m512
11816 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11817 _mm512_mul_ps (__m512 __A, __m512 __B)
11819 return (__m512) ((__v16sf)__A * (__v16sf)__B);
11822 extern __inline __m512
11823 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11824 _mm512_mask_mul_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11826 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
11827 (__v16sf) __B,
11828 (__v16sf) __W,
11829 (__mmask16) __U,
11830 _MM_FROUND_CUR_DIRECTION);
11833 extern __inline __m512
11834 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11835 _mm512_maskz_mul_ps (__mmask16 __U, __m512 __A, __m512 __B)
11837 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
11838 (__v16sf) __B,
11839 (__v16sf)
11840 _mm512_setzero_ps (),
11841 (__mmask16) __U,
11842 _MM_FROUND_CUR_DIRECTION);
11845 extern __inline __m128d
11846 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11847 _mm_mask_mul_sd (__m128d __W, __mmask8 __U, __m128d __A,
11848 __m128d __B)
11850 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
11851 (__v2df) __B,
11852 (__v2df) __W,
11853 (__mmask8) __U,
11854 _MM_FROUND_CUR_DIRECTION);
11857 extern __inline __m128d
11858 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11859 _mm_maskz_mul_sd (__mmask8 __U, __m128d __A, __m128d __B)
11861 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
11862 (__v2df) __B,
11863 (__v2df)
11864 _mm_setzero_pd (),
11865 (__mmask8) __U,
11866 _MM_FROUND_CUR_DIRECTION);
11869 extern __inline __m128
11870 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11871 _mm_mask_mul_ss (__m128 __W, __mmask8 __U, __m128 __A,
11872 __m128 __B)
11874 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
11875 (__v4sf) __B,
11876 (__v4sf) __W,
11877 (__mmask8) __U,
11878 _MM_FROUND_CUR_DIRECTION);
11881 extern __inline __m128
11882 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11883 _mm_maskz_mul_ss (__mmask8 __U, __m128 __A, __m128 __B)
11885 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
11886 (__v4sf) __B,
11887 (__v4sf)
11888 _mm_setzero_ps (),
11889 (__mmask8) __U,
11890 _MM_FROUND_CUR_DIRECTION);
11893 extern __inline __m512d
11894 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11895 _mm512_div_pd (__m512d __M, __m512d __V)
11897 return (__m512d) ((__v8df)__M / (__v8df)__V);
11900 extern __inline __m512d
11901 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11902 _mm512_mask_div_pd (__m512d __W, __mmask8 __U, __m512d __M, __m512d __V)
11904 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
11905 (__v8df) __V,
11906 (__v8df) __W,
11907 (__mmask8) __U,
11908 _MM_FROUND_CUR_DIRECTION);
11911 extern __inline __m512d
11912 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11913 _mm512_maskz_div_pd (__mmask8 __U, __m512d __M, __m512d __V)
11915 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
11916 (__v8df) __V,
11917 (__v8df)
11918 _mm512_setzero_pd (),
11919 (__mmask8) __U,
11920 _MM_FROUND_CUR_DIRECTION);
11923 extern __inline __m512
11924 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11925 _mm512_div_ps (__m512 __A, __m512 __B)
11927 return (__m512) ((__v16sf)__A / (__v16sf)__B);
11930 extern __inline __m512
11931 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11932 _mm512_mask_div_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11934 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
11935 (__v16sf) __B,
11936 (__v16sf) __W,
11937 (__mmask16) __U,
11938 _MM_FROUND_CUR_DIRECTION);
11941 extern __inline __m512
11942 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11943 _mm512_maskz_div_ps (__mmask16 __U, __m512 __A, __m512 __B)
11945 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
11946 (__v16sf) __B,
11947 (__v16sf)
11948 _mm512_setzero_ps (),
11949 (__mmask16) __U,
11950 _MM_FROUND_CUR_DIRECTION);
11953 extern __inline __m128d
11954 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11955 _mm_mask_div_sd (__m128d __W, __mmask8 __U, __m128d __A,
11956 __m128d __B)
11958 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
11959 (__v2df) __B,
11960 (__v2df) __W,
11961 (__mmask8) __U,
11962 _MM_FROUND_CUR_DIRECTION);
11965 extern __inline __m128d
11966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11967 _mm_maskz_div_sd (__mmask8 __U, __m128d __A, __m128d __B)
11969 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
11970 (__v2df) __B,
11971 (__v2df)
11972 _mm_setzero_pd (),
11973 (__mmask8) __U,
11974 _MM_FROUND_CUR_DIRECTION);
11977 extern __inline __m128
11978 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11979 _mm_mask_div_ss (__m128 __W, __mmask8 __U, __m128 __A,
11980 __m128 __B)
11982 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
11983 (__v4sf) __B,
11984 (__v4sf) __W,
11985 (__mmask8) __U,
11986 _MM_FROUND_CUR_DIRECTION);
11989 extern __inline __m128
11990 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11991 _mm_maskz_div_ss (__mmask8 __U, __m128 __A, __m128 __B)
11993 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
11994 (__v4sf) __B,
11995 (__v4sf)
11996 _mm_setzero_ps (),
11997 (__mmask8) __U,
11998 _MM_FROUND_CUR_DIRECTION);
12001 extern __inline __m512d
12002 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12003 _mm512_max_pd (__m512d __A, __m512d __B)
12005 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
12006 (__v8df) __B,
12007 (__v8df)
12008 _mm512_undefined_pd (),
12009 (__mmask8) -1,
12010 _MM_FROUND_CUR_DIRECTION);
12013 extern __inline __m512d
12014 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12015 _mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
12017 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
12018 (__v8df) __B,
12019 (__v8df) __W,
12020 (__mmask8) __U,
12021 _MM_FROUND_CUR_DIRECTION);
12024 extern __inline __m512d
12025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12026 _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
12028 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
12029 (__v8df) __B,
12030 (__v8df)
12031 _mm512_setzero_pd (),
12032 (__mmask8) __U,
12033 _MM_FROUND_CUR_DIRECTION);
12036 extern __inline __m512
12037 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12038 _mm512_max_ps (__m512 __A, __m512 __B)
12040 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
12041 (__v16sf) __B,
12042 (__v16sf)
12043 _mm512_undefined_ps (),
12044 (__mmask16) -1,
12045 _MM_FROUND_CUR_DIRECTION);
12048 extern __inline __m512
12049 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12050 _mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
12052 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
12053 (__v16sf) __B,
12054 (__v16sf) __W,
12055 (__mmask16) __U,
12056 _MM_FROUND_CUR_DIRECTION);
12059 extern __inline __m512
12060 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12061 _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
12063 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
12064 (__v16sf) __B,
12065 (__v16sf)
12066 _mm512_setzero_ps (),
12067 (__mmask16) __U,
12068 _MM_FROUND_CUR_DIRECTION);
12071 extern __inline __m128d
12072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12073 _mm_mask_max_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
12075 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
12076 (__v2df) __B,
12077 (__v2df) __W,
12078 (__mmask8) __U,
12079 _MM_FROUND_CUR_DIRECTION);
12082 extern __inline __m128d
12083 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12084 _mm_maskz_max_sd (__mmask8 __U, __m128d __A, __m128d __B)
12086 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
12087 (__v2df) __B,
12088 (__v2df)
12089 _mm_setzero_pd (),
12090 (__mmask8) __U,
12091 _MM_FROUND_CUR_DIRECTION);
12094 extern __inline __m128
12095 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12096 _mm_mask_max_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
12098 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
12099 (__v4sf) __B,
12100 (__v4sf) __W,
12101 (__mmask8) __U,
12102 _MM_FROUND_CUR_DIRECTION);
12105 extern __inline __m128
12106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12107 _mm_maskz_max_ss (__mmask8 __U, __m128 __A, __m128 __B)
12109 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
12110 (__v4sf) __B,
12111 (__v4sf)
12112 _mm_setzero_ps (),
12113 (__mmask8) __U,
12114 _MM_FROUND_CUR_DIRECTION);
12117 extern __inline __m512d
12118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12119 _mm512_min_pd (__m512d __A, __m512d __B)
12121 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
12122 (__v8df) __B,
12123 (__v8df)
12124 _mm512_undefined_pd (),
12125 (__mmask8) -1,
12126 _MM_FROUND_CUR_DIRECTION);
12129 extern __inline __m512d
12130 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12131 _mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
12133 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
12134 (__v8df) __B,
12135 (__v8df) __W,
12136 (__mmask8) __U,
12137 _MM_FROUND_CUR_DIRECTION);
12140 extern __inline __m512d
12141 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12142 _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
12144 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
12145 (__v8df) __B,
12146 (__v8df)
12147 _mm512_setzero_pd (),
12148 (__mmask8) __U,
12149 _MM_FROUND_CUR_DIRECTION);
12152 extern __inline __m512
12153 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12154 _mm512_min_ps (__m512 __A, __m512 __B)
12156 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
12157 (__v16sf) __B,
12158 (__v16sf)
12159 _mm512_undefined_ps (),
12160 (__mmask16) -1,
12161 _MM_FROUND_CUR_DIRECTION);
12164 extern __inline __m512
12165 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12166 _mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
12168 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
12169 (__v16sf) __B,
12170 (__v16sf) __W,
12171 (__mmask16) __U,
12172 _MM_FROUND_CUR_DIRECTION);
12175 extern __inline __m512
12176 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12177 _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
12179 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
12180 (__v16sf) __B,
12181 (__v16sf)
12182 _mm512_setzero_ps (),
12183 (__mmask16) __U,
12184 _MM_FROUND_CUR_DIRECTION);
12187 extern __inline __m128d
12188 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12189 _mm_mask_min_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
12191 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
12192 (__v2df) __B,
12193 (__v2df) __W,
12194 (__mmask8) __U,
12195 _MM_FROUND_CUR_DIRECTION);
12198 extern __inline __m128d
12199 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12200 _mm_maskz_min_sd (__mmask8 __U, __m128d __A, __m128d __B)
12202 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
12203 (__v2df) __B,
12204 (__v2df)
12205 _mm_setzero_pd (),
12206 (__mmask8) __U,
12207 _MM_FROUND_CUR_DIRECTION);
12210 extern __inline __m128
12211 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12212 _mm_mask_min_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
12214 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
12215 (__v4sf) __B,
12216 (__v4sf) __W,
12217 (__mmask8) __U,
12218 _MM_FROUND_CUR_DIRECTION);
12221 extern __inline __m128
12222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12223 _mm_maskz_min_ss (__mmask8 __U, __m128 __A, __m128 __B)
12225 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
12226 (__v4sf) __B,
12227 (__v4sf)
12228 _mm_setzero_ps (),
12229 (__mmask8) __U,
12230 _MM_FROUND_CUR_DIRECTION);
12233 extern __inline __m512d
12234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12235 _mm512_scalef_pd (__m512d __A, __m512d __B)
12237 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
12238 (__v8df) __B,
12239 (__v8df)
12240 _mm512_undefined_pd (),
12241 (__mmask8) -1,
12242 _MM_FROUND_CUR_DIRECTION);
12245 extern __inline __m512d
12246 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12247 _mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
12249 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
12250 (__v8df) __B,
12251 (__v8df) __W,
12252 (__mmask8) __U,
12253 _MM_FROUND_CUR_DIRECTION);
12256 extern __inline __m512d
12257 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12258 _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
12260 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
12261 (__v8df) __B,
12262 (__v8df)
12263 _mm512_setzero_pd (),
12264 (__mmask8) __U,
12265 _MM_FROUND_CUR_DIRECTION);
12268 extern __inline __m512
12269 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12270 _mm512_scalef_ps (__m512 __A, __m512 __B)
12272 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
12273 (__v16sf) __B,
12274 (__v16sf)
12275 _mm512_undefined_ps (),
12276 (__mmask16) -1,
12277 _MM_FROUND_CUR_DIRECTION);
12280 extern __inline __m512
12281 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12282 _mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
12284 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
12285 (__v16sf) __B,
12286 (__v16sf) __W,
12287 (__mmask16) __U,
12288 _MM_FROUND_CUR_DIRECTION);
12291 extern __inline __m512
12292 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12293 _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
12295 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
12296 (__v16sf) __B,
12297 (__v16sf)
12298 _mm512_setzero_ps (),
12299 (__mmask16) __U,
12300 _MM_FROUND_CUR_DIRECTION);
12303 extern __inline __m128d
12304 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12305 _mm_scalef_sd (__m128d __A, __m128d __B)
12307 return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
12308 (__v2df) __B,
12309 (__v2df)
12310 _mm_setzero_pd (),
12311 (__mmask8) -1,
12312 _MM_FROUND_CUR_DIRECTION);
12315 extern __inline __m128
12316 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12317 _mm_scalef_ss (__m128 __A, __m128 __B)
12319 return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
12320 (__v4sf) __B,
12321 (__v4sf)
12322 _mm_setzero_ps (),
12323 (__mmask8) -1,
12324 _MM_FROUND_CUR_DIRECTION);
12327 extern __inline __m512d
12328 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12329 _mm512_fmadd_pd (__m512d __A, __m512d __B, __m512d __C)
12331 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
12332 (__v8df) __B,
12333 (__v8df) __C,
12334 (__mmask8) -1,
12335 _MM_FROUND_CUR_DIRECTION);
12338 extern __inline __m512d
12339 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12340 _mm512_mask_fmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
12342 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
12343 (__v8df) __B,
12344 (__v8df) __C,
12345 (__mmask8) __U,
12346 _MM_FROUND_CUR_DIRECTION);
12349 extern __inline __m512d
12350 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12351 _mm512_mask3_fmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
12353 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
12354 (__v8df) __B,
12355 (__v8df) __C,
12356 (__mmask8) __U,
12357 _MM_FROUND_CUR_DIRECTION);
12360 extern __inline __m512d
12361 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12362 _mm512_maskz_fmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
12364 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
12365 (__v8df) __B,
12366 (__v8df) __C,
12367 (__mmask8) __U,
12368 _MM_FROUND_CUR_DIRECTION);
12371 extern __inline __m512
12372 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12373 _mm512_fmadd_ps (__m512 __A, __m512 __B, __m512 __C)
12375 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
12376 (__v16sf) __B,
12377 (__v16sf) __C,
12378 (__mmask16) -1,
12379 _MM_FROUND_CUR_DIRECTION);
12382 extern __inline __m512
12383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12384 _mm512_mask_fmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
12386 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
12387 (__v16sf) __B,
12388 (__v16sf) __C,
12389 (__mmask16) __U,
12390 _MM_FROUND_CUR_DIRECTION);
12393 extern __inline __m512
12394 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12395 _mm512_mask3_fmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
12397 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
12398 (__v16sf) __B,
12399 (__v16sf) __C,
12400 (__mmask16) __U,
12401 _MM_FROUND_CUR_DIRECTION);
12404 extern __inline __m512
12405 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12406 _mm512_maskz_fmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
12408 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
12409 (__v16sf) __B,
12410 (__v16sf) __C,
12411 (__mmask16) __U,
12412 _MM_FROUND_CUR_DIRECTION);
12415 extern __inline __m512d
12416 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12417 _mm512_fmsub_pd (__m512d __A, __m512d __B, __m512d __C)
12419 return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A,
12420 (__v8df) __B,
12421 (__v8df) __C,
12422 (__mmask8) -1,
12423 _MM_FROUND_CUR_DIRECTION);
12426 extern __inline __m512d
12427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12428 _mm512_mask_fmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
12430 return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A,
12431 (__v8df) __B,
12432 (__v8df) __C,
12433 (__mmask8) __U,
12434 _MM_FROUND_CUR_DIRECTION);
12437 extern __inline __m512d
12438 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12439 _mm512_mask3_fmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
12441 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
12442 (__v8df) __B,
12443 (__v8df) __C,
12444 (__mmask8) __U,
12445 _MM_FROUND_CUR_DIRECTION);
12448 extern __inline __m512d
12449 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12450 _mm512_maskz_fmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
12452 return (__m512d) __builtin_ia32_vfmsubpd512_maskz ((__v8df) __A,
12453 (__v8df) __B,
12454 (__v8df) __C,
12455 (__mmask8) __U,
12456 _MM_FROUND_CUR_DIRECTION);
12459 extern __inline __m512
12460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12461 _mm512_fmsub_ps (__m512 __A, __m512 __B, __m512 __C)
12463 return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A,
12464 (__v16sf) __B,
12465 (__v16sf) __C,
12466 (__mmask16) -1,
12467 _MM_FROUND_CUR_DIRECTION);
12470 extern __inline __m512
12471 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12472 _mm512_mask_fmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
12474 return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A,
12475 (__v16sf) __B,
12476 (__v16sf) __C,
12477 (__mmask16) __U,
12478 _MM_FROUND_CUR_DIRECTION);
12481 extern __inline __m512
12482 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12483 _mm512_mask3_fmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
12485 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
12486 (__v16sf) __B,
12487 (__v16sf) __C,
12488 (__mmask16) __U,
12489 _MM_FROUND_CUR_DIRECTION);
12492 extern __inline __m512
12493 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12494 _mm512_maskz_fmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
12496 return (__m512) __builtin_ia32_vfmsubps512_maskz ((__v16sf) __A,
12497 (__v16sf) __B,
12498 (__v16sf) __C,
12499 (__mmask16) __U,
12500 _MM_FROUND_CUR_DIRECTION);
12503 extern __inline __m512d
12504 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12505 _mm512_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C)
12507 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
12508 (__v8df) __B,
12509 (__v8df) __C,
12510 (__mmask8) -1,
12511 _MM_FROUND_CUR_DIRECTION);
12514 extern __inline __m512d
12515 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12516 _mm512_mask_fmaddsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
12518 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
12519 (__v8df) __B,
12520 (__v8df) __C,
12521 (__mmask8) __U,
12522 _MM_FROUND_CUR_DIRECTION);
12525 extern __inline __m512d
12526 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12527 _mm512_mask3_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
12529 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
12530 (__v8df) __B,
12531 (__v8df) __C,
12532 (__mmask8) __U,
12533 _MM_FROUND_CUR_DIRECTION);
12536 extern __inline __m512d
12537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12538 _mm512_maskz_fmaddsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
12540 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
12541 (__v8df) __B,
12542 (__v8df) __C,
12543 (__mmask8) __U,
12544 _MM_FROUND_CUR_DIRECTION);
12547 extern __inline __m512
12548 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12549 _mm512_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C)
12551 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
12552 (__v16sf) __B,
12553 (__v16sf) __C,
12554 (__mmask16) -1,
12555 _MM_FROUND_CUR_DIRECTION);
12558 extern __inline __m512
12559 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12560 _mm512_mask_fmaddsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
12562 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
12563 (__v16sf) __B,
12564 (__v16sf) __C,
12565 (__mmask16) __U,
12566 _MM_FROUND_CUR_DIRECTION);
12569 extern __inline __m512
12570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12571 _mm512_mask3_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
12573 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
12574 (__v16sf) __B,
12575 (__v16sf) __C,
12576 (__mmask16) __U,
12577 _MM_FROUND_CUR_DIRECTION);
12580 extern __inline __m512
12581 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12582 _mm512_maskz_fmaddsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
12584 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
12585 (__v16sf) __B,
12586 (__v16sf) __C,
12587 (__mmask16) __U,
12588 _MM_FROUND_CUR_DIRECTION);
12591 extern __inline __m512d
12592 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12593 _mm512_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C)
12595 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
12596 (__v8df) __B,
12597 -(__v8df) __C,
12598 (__mmask8) -1,
12599 _MM_FROUND_CUR_DIRECTION);
12602 extern __inline __m512d
12603 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12604 _mm512_mask_fmsubadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
12606 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
12607 (__v8df) __B,
12608 -(__v8df) __C,
12609 (__mmask8) __U,
12610 _MM_FROUND_CUR_DIRECTION);
12613 extern __inline __m512d
12614 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12615 _mm512_mask3_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
12617 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
12618 (__v8df) __B,
12619 (__v8df) __C,
12620 (__mmask8) __U,
12621 _MM_FROUND_CUR_DIRECTION);
12624 extern __inline __m512d
12625 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12626 _mm512_maskz_fmsubadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
12628 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
12629 (__v8df) __B,
12630 -(__v8df) __C,
12631 (__mmask8) __U,
12632 _MM_FROUND_CUR_DIRECTION);
12635 extern __inline __m512
12636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12637 _mm512_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C)
12639 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
12640 (__v16sf) __B,
12641 -(__v16sf) __C,
12642 (__mmask16) -1,
12643 _MM_FROUND_CUR_DIRECTION);
12646 extern __inline __m512
12647 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12648 _mm512_mask_fmsubadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
12650 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
12651 (__v16sf) __B,
12652 -(__v16sf) __C,
12653 (__mmask16) __U,
12654 _MM_FROUND_CUR_DIRECTION);
12657 extern __inline __m512
12658 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12659 _mm512_mask3_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
12661 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
12662 (__v16sf) __B,
12663 (__v16sf) __C,
12664 (__mmask16) __U,
12665 _MM_FROUND_CUR_DIRECTION);
12668 extern __inline __m512
12669 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12670 _mm512_maskz_fmsubadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
12672 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
12673 (__v16sf) __B,
12674 -(__v16sf) __C,
12675 (__mmask16) __U,
12676 _MM_FROUND_CUR_DIRECTION);
12679 extern __inline __m512d
12680 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12681 _mm512_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C)
12683 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
12684 (__v8df) __B,
12685 (__v8df) __C,
12686 (__mmask8) -1,
12687 _MM_FROUND_CUR_DIRECTION);
12690 extern __inline __m512d
12691 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12692 _mm512_mask_fnmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
12694 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
12695 (__v8df) __B,
12696 (__v8df) __C,
12697 (__mmask8) __U,
12698 _MM_FROUND_CUR_DIRECTION);
12701 extern __inline __m512d
12702 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12703 _mm512_mask3_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
12705 return (__m512d) __builtin_ia32_vfnmaddpd512_mask3 ((__v8df) __A,
12706 (__v8df) __B,
12707 (__v8df) __C,
12708 (__mmask8) __U,
12709 _MM_FROUND_CUR_DIRECTION);
12712 extern __inline __m512d
12713 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12714 _mm512_maskz_fnmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
12716 return (__m512d) __builtin_ia32_vfnmaddpd512_maskz ((__v8df) __A,
12717 (__v8df) __B,
12718 (__v8df) __C,
12719 (__mmask8) __U,
12720 _MM_FROUND_CUR_DIRECTION);
12723 extern __inline __m512
12724 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12725 _mm512_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C)
12727 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
12728 (__v16sf) __B,
12729 (__v16sf) __C,
12730 (__mmask16) -1,
12731 _MM_FROUND_CUR_DIRECTION);
12734 extern __inline __m512
12735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12736 _mm512_mask_fnmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
12738 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
12739 (__v16sf) __B,
12740 (__v16sf) __C,
12741 (__mmask16) __U,
12742 _MM_FROUND_CUR_DIRECTION);
12745 extern __inline __m512
12746 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12747 _mm512_mask3_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
12749 return (__m512) __builtin_ia32_vfnmaddps512_mask3 ((__v16sf) __A,
12750 (__v16sf) __B,
12751 (__v16sf) __C,
12752 (__mmask16) __U,
12753 _MM_FROUND_CUR_DIRECTION);
12756 extern __inline __m512
12757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12758 _mm512_maskz_fnmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
12760 return (__m512) __builtin_ia32_vfnmaddps512_maskz ((__v16sf) __A,
12761 (__v16sf) __B,
12762 (__v16sf) __C,
12763 (__mmask16) __U,
12764 _MM_FROUND_CUR_DIRECTION);
12767 extern __inline __m512d
12768 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12769 _mm512_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C)
12771 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
12772 (__v8df) __B,
12773 (__v8df) __C,
12774 (__mmask8) -1,
12775 _MM_FROUND_CUR_DIRECTION);
12778 extern __inline __m512d
12779 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12780 _mm512_mask_fnmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
12782 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
12783 (__v8df) __B,
12784 (__v8df) __C,
12785 (__mmask8) __U,
12786 _MM_FROUND_CUR_DIRECTION);
12789 extern __inline __m512d
12790 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12791 _mm512_mask3_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
12793 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
12794 (__v8df) __B,
12795 (__v8df) __C,
12796 (__mmask8) __U,
12797 _MM_FROUND_CUR_DIRECTION);
12800 extern __inline __m512d
12801 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12802 _mm512_maskz_fnmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
12804 return (__m512d) __builtin_ia32_vfnmsubpd512_maskz ((__v8df) __A,
12805 (__v8df) __B,
12806 (__v8df) __C,
12807 (__mmask8) __U,
12808 _MM_FROUND_CUR_DIRECTION);
12811 extern __inline __m512
12812 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12813 _mm512_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C)
12815 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
12816 (__v16sf) __B,
12817 (__v16sf) __C,
12818 (__mmask16) -1,
12819 _MM_FROUND_CUR_DIRECTION);
12822 extern __inline __m512
12823 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12824 _mm512_mask_fnmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
12826 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
12827 (__v16sf) __B,
12828 (__v16sf) __C,
12829 (__mmask16) __U,
12830 _MM_FROUND_CUR_DIRECTION);
12833 extern __inline __m512
12834 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12835 _mm512_mask3_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
12837 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
12838 (__v16sf) __B,
12839 (__v16sf) __C,
12840 (__mmask16) __U,
12841 _MM_FROUND_CUR_DIRECTION);
12844 extern __inline __m512
12845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12846 _mm512_maskz_fnmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
12848 return (__m512) __builtin_ia32_vfnmsubps512_maskz ((__v16sf) __A,
12849 (__v16sf) __B,
12850 (__v16sf) __C,
12851 (__mmask16) __U,
12852 _MM_FROUND_CUR_DIRECTION);
12855 extern __inline __m256i
12856 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12857 _mm512_cvttpd_epi32 (__m512d __A)
12859 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
12860 (__v8si)
12861 _mm256_undefined_si256 (),
12862 (__mmask8) -1,
12863 _MM_FROUND_CUR_DIRECTION);
12866 extern __inline __m256i
12867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12868 _mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
12870 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
12871 (__v8si) __W,
12872 (__mmask8) __U,
12873 _MM_FROUND_CUR_DIRECTION);
12876 extern __inline __m256i
12877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12878 _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
12880 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
12881 (__v8si)
12882 _mm256_setzero_si256 (),
12883 (__mmask8) __U,
12884 _MM_FROUND_CUR_DIRECTION);
12887 extern __inline __m256i
12888 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12889 _mm512_cvttpd_epu32 (__m512d __A)
12891 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
12892 (__v8si)
12893 _mm256_undefined_si256 (),
12894 (__mmask8) -1,
12895 _MM_FROUND_CUR_DIRECTION);
12898 extern __inline __m256i
12899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12900 _mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
12902 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
12903 (__v8si) __W,
12904 (__mmask8) __U,
12905 _MM_FROUND_CUR_DIRECTION);
12908 extern __inline __m256i
12909 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12910 _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
12912 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
12913 (__v8si)
12914 _mm256_setzero_si256 (),
12915 (__mmask8) __U,
12916 _MM_FROUND_CUR_DIRECTION);
12919 extern __inline __m256i
12920 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12921 _mm512_cvtpd_epi32 (__m512d __A)
12923 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
12924 (__v8si)
12925 _mm256_undefined_si256 (),
12926 (__mmask8) -1,
12927 _MM_FROUND_CUR_DIRECTION);
12930 extern __inline __m256i
12931 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12932 _mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
12934 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
12935 (__v8si) __W,
12936 (__mmask8) __U,
12937 _MM_FROUND_CUR_DIRECTION);
12940 extern __inline __m256i
12941 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12942 _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
12944 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
12945 (__v8si)
12946 _mm256_setzero_si256 (),
12947 (__mmask8) __U,
12948 _MM_FROUND_CUR_DIRECTION);
12951 extern __inline __m256i
12952 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12953 _mm512_cvtpd_epu32 (__m512d __A)
12955 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
12956 (__v8si)
12957 _mm256_undefined_si256 (),
12958 (__mmask8) -1,
12959 _MM_FROUND_CUR_DIRECTION);
12962 extern __inline __m256i
12963 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12964 _mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
12966 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
12967 (__v8si) __W,
12968 (__mmask8) __U,
12969 _MM_FROUND_CUR_DIRECTION);
12972 extern __inline __m256i
12973 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12974 _mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
12976 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
12977 (__v8si)
12978 _mm256_setzero_si256 (),
12979 (__mmask8) __U,
12980 _MM_FROUND_CUR_DIRECTION);
12983 extern __inline __m512i
12984 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12985 _mm512_cvttps_epi32 (__m512 __A)
12987 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
12988 (__v16si)
12989 _mm512_undefined_epi32 (),
12990 (__mmask16) -1,
12991 _MM_FROUND_CUR_DIRECTION);
12994 extern __inline __m512i
12995 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12996 _mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
12998 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
12999 (__v16si) __W,
13000 (__mmask16) __U,
13001 _MM_FROUND_CUR_DIRECTION);
13004 extern __inline __m512i
13005 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13006 _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
13008 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
13009 (__v16si)
13010 _mm512_setzero_si512 (),
13011 (__mmask16) __U,
13012 _MM_FROUND_CUR_DIRECTION);
13015 extern __inline __m512i
13016 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13017 _mm512_cvttps_epu32 (__m512 __A)
13019 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
13020 (__v16si)
13021 _mm512_undefined_epi32 (),
13022 (__mmask16) -1,
13023 _MM_FROUND_CUR_DIRECTION);
13026 extern __inline __m512i
13027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13028 _mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
13030 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
13031 (__v16si) __W,
13032 (__mmask16) __U,
13033 _MM_FROUND_CUR_DIRECTION);
13036 extern __inline __m512i
13037 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13038 _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
13040 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
13041 (__v16si)
13042 _mm512_setzero_si512 (),
13043 (__mmask16) __U,
13044 _MM_FROUND_CUR_DIRECTION);
13047 extern __inline __m512i
13048 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13049 _mm512_cvtps_epi32 (__m512 __A)
13051 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
13052 (__v16si)
13053 _mm512_undefined_epi32 (),
13054 (__mmask16) -1,
13055 _MM_FROUND_CUR_DIRECTION);
13058 extern __inline __m512i
13059 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13060 _mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
13062 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
13063 (__v16si) __W,
13064 (__mmask16) __U,
13065 _MM_FROUND_CUR_DIRECTION);
13068 extern __inline __m512i
13069 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13070 _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
13072 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
13073 (__v16si)
13074 _mm512_setzero_si512 (),
13075 (__mmask16) __U,
13076 _MM_FROUND_CUR_DIRECTION);
13079 extern __inline __m512i
13080 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13081 _mm512_cvtps_epu32 (__m512 __A)
13083 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
13084 (__v16si)
13085 _mm512_undefined_epi32 (),
13086 (__mmask16) -1,
13087 _MM_FROUND_CUR_DIRECTION);
13090 extern __inline __m512i
13091 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13092 _mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
13094 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
13095 (__v16si) __W,
13096 (__mmask16) __U,
13097 _MM_FROUND_CUR_DIRECTION);
13100 extern __inline __m512i
13101 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13102 _mm512_maskz_cvtps_epu32 (__mmask16 __U, __m512 __A)
13104 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
13105 (__v16si)
13106 _mm512_setzero_si512 (),
13107 (__mmask16) __U,
13108 _MM_FROUND_CUR_DIRECTION);
13111 extern __inline double
13112 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13113 _mm512_cvtsd_f64 (__m512d __A)
13115 return __A[0];
13118 extern __inline float
13119 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13120 _mm512_cvtss_f32 (__m512 __A)
13122 return __A[0];
13125 #ifdef __x86_64__
13126 extern __inline __m128
13127 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13128 _mm_cvtu64_ss (__m128 __A, unsigned long long __B)
13130 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
13131 _MM_FROUND_CUR_DIRECTION);
13134 extern __inline __m128d
13135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13136 _mm_cvtu64_sd (__m128d __A, unsigned long long __B)
13138 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
13139 _MM_FROUND_CUR_DIRECTION);
13141 #endif
13143 extern __inline __m128
13144 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13145 _mm_cvtu32_ss (__m128 __A, unsigned __B)
13147 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
13148 _MM_FROUND_CUR_DIRECTION);
13151 extern __inline __m512
13152 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13153 _mm512_cvtepi32_ps (__m512i __A)
13155 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
13156 (__v16sf)
13157 _mm512_undefined_ps (),
13158 (__mmask16) -1,
13159 _MM_FROUND_CUR_DIRECTION);
13162 extern __inline __m512
13163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13164 _mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
13166 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
13167 (__v16sf) __W,
13168 (__mmask16) __U,
13169 _MM_FROUND_CUR_DIRECTION);
13172 extern __inline __m512
13173 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13174 _mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
13176 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
13177 (__v16sf)
13178 _mm512_setzero_ps (),
13179 (__mmask16) __U,
13180 _MM_FROUND_CUR_DIRECTION);
13183 extern __inline __m512
13184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13185 _mm512_cvtepu32_ps (__m512i __A)
13187 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
13188 (__v16sf)
13189 _mm512_undefined_ps (),
13190 (__mmask16) -1,
13191 _MM_FROUND_CUR_DIRECTION);
13194 extern __inline __m512
13195 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13196 _mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
13198 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
13199 (__v16sf) __W,
13200 (__mmask16) __U,
13201 _MM_FROUND_CUR_DIRECTION);
13204 extern __inline __m512
13205 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13206 _mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
13208 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
13209 (__v16sf)
13210 _mm512_setzero_ps (),
13211 (__mmask16) __U,
13212 _MM_FROUND_CUR_DIRECTION);
13215 #ifdef __OPTIMIZE__
13216 extern __inline __m512d
13217 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13218 _mm512_fixupimm_pd (__m512d __A, __m512d __B, __m512i __C, const int __imm)
13220 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
13221 (__v8df) __B,
13222 (__v8di) __C,
13223 __imm,
13224 (__mmask8) -1,
13225 _MM_FROUND_CUR_DIRECTION);
13228 extern __inline __m512d
13229 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13230 _mm512_mask_fixupimm_pd (__m512d __A, __mmask8 __U, __m512d __B,
13231 __m512i __C, const int __imm)
13233 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
13234 (__v8df) __B,
13235 (__v8di) __C,
13236 __imm,
13237 (__mmask8) __U,
13238 _MM_FROUND_CUR_DIRECTION);
13241 extern __inline __m512d
13242 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13243 _mm512_maskz_fixupimm_pd (__mmask8 __U, __m512d __A, __m512d __B,
13244 __m512i __C, const int __imm)
13246 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
13247 (__v8df) __B,
13248 (__v8di) __C,
13249 __imm,
13250 (__mmask8) __U,
13251 _MM_FROUND_CUR_DIRECTION);
13254 extern __inline __m512
13255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13256 _mm512_fixupimm_ps (__m512 __A, __m512 __B, __m512i __C, const int __imm)
13258 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
13259 (__v16sf) __B,
13260 (__v16si) __C,
13261 __imm,
13262 (__mmask16) -1,
13263 _MM_FROUND_CUR_DIRECTION);
13266 extern __inline __m512
13267 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13268 _mm512_mask_fixupimm_ps (__m512 __A, __mmask16 __U, __m512 __B,
13269 __m512i __C, const int __imm)
13271 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
13272 (__v16sf) __B,
13273 (__v16si) __C,
13274 __imm,
13275 (__mmask16) __U,
13276 _MM_FROUND_CUR_DIRECTION);
13279 extern __inline __m512
13280 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13281 _mm512_maskz_fixupimm_ps (__mmask16 __U, __m512 __A, __m512 __B,
13282 __m512i __C, const int __imm)
13284 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
13285 (__v16sf) __B,
13286 (__v16si) __C,
13287 __imm,
13288 (__mmask16) __U,
13289 _MM_FROUND_CUR_DIRECTION);
13292 extern __inline __m128d
13293 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13294 _mm_fixupimm_sd (__m128d __A, __m128d __B, __m128i __C, const int __imm)
13296 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
13297 (__v2df) __B,
13298 (__v2di) __C, __imm,
13299 (__mmask8) -1,
13300 _MM_FROUND_CUR_DIRECTION);
13303 extern __inline __m128d
13304 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13305 _mm_mask_fixupimm_sd (__m128d __A, __mmask8 __U, __m128d __B,
13306 __m128i __C, const int __imm)
13308 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
13309 (__v2df) __B,
13310 (__v2di) __C, __imm,
13311 (__mmask8) __U,
13312 _MM_FROUND_CUR_DIRECTION);
13315 extern __inline __m128d
13316 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13317 _mm_maskz_fixupimm_sd (__mmask8 __U, __m128d __A, __m128d __B,
13318 __m128i __C, const int __imm)
13320 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
13321 (__v2df) __B,
13322 (__v2di) __C,
13323 __imm,
13324 (__mmask8) __U,
13325 _MM_FROUND_CUR_DIRECTION);
13328 extern __inline __m128
13329 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13330 _mm_fixupimm_ss (__m128 __A, __m128 __B, __m128i __C, const int __imm)
13332 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
13333 (__v4sf) __B,
13334 (__v4si) __C, __imm,
13335 (__mmask8) -1,
13336 _MM_FROUND_CUR_DIRECTION);
13339 extern __inline __m128
13340 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13341 _mm_mask_fixupimm_ss (__m128 __A, __mmask8 __U, __m128 __B,
13342 __m128i __C, const int __imm)
13344 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
13345 (__v4sf) __B,
13346 (__v4si) __C, __imm,
13347 (__mmask8) __U,
13348 _MM_FROUND_CUR_DIRECTION);
13351 extern __inline __m128
13352 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13353 _mm_maskz_fixupimm_ss (__mmask8 __U, __m128 __A, __m128 __B,
13354 __m128i __C, const int __imm)
13356 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
13357 (__v4sf) __B,
13358 (__v4si) __C, __imm,
13359 (__mmask8) __U,
13360 _MM_FROUND_CUR_DIRECTION);
13362 #else
13363 #define _mm512_fixupimm_pd(X, Y, Z, C) \
13364 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
13365 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
13366 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
13368 #define _mm512_mask_fixupimm_pd(X, U, Y, Z, C) \
13369 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
13370 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
13371 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13373 #define _mm512_maskz_fixupimm_pd(U, X, Y, Z, C) \
13374 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
13375 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
13376 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13378 #define _mm512_fixupimm_ps(X, Y, Z, C) \
13379 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
13380 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
13381 (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
13383 #define _mm512_mask_fixupimm_ps(X, U, Y, Z, C) \
13384 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
13385 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
13386 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
13388 #define _mm512_maskz_fixupimm_ps(U, X, Y, Z, C) \
13389 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
13390 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
13391 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
13393 #define _mm_fixupimm_sd(X, Y, Z, C) \
13394 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
13395 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
13396 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
13398 #define _mm_mask_fixupimm_sd(X, U, Y, Z, C) \
13399 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
13400 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
13401 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13403 #define _mm_maskz_fixupimm_sd(U, X, Y, Z, C) \
13404 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
13405 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
13406 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13408 #define _mm_fixupimm_ss(X, Y, Z, C) \
13409 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
13410 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
13411 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
13413 #define _mm_mask_fixupimm_ss(X, U, Y, Z, C) \
13414 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
13415 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
13416 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13418 #define _mm_maskz_fixupimm_ss(U, X, Y, Z, C) \
13419 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
13420 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
13421 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13422 #endif
13424 #ifdef __x86_64__
13425 extern __inline unsigned long long
13426 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13427 _mm_cvtss_u64 (__m128 __A)
13429 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
13430 __A,
13431 _MM_FROUND_CUR_DIRECTION);
13434 extern __inline unsigned long long
13435 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13436 _mm_cvttss_u64 (__m128 __A)
13438 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
13439 __A,
13440 _MM_FROUND_CUR_DIRECTION);
13443 extern __inline long long
13444 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13445 _mm_cvttss_i64 (__m128 __A)
13447 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
13448 _MM_FROUND_CUR_DIRECTION);
13450 #endif /* __x86_64__ */
13452 extern __inline unsigned
13453 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13454 _mm_cvtss_u32 (__m128 __A)
13456 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
13457 _MM_FROUND_CUR_DIRECTION);
13460 extern __inline unsigned
13461 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13462 _mm_cvttss_u32 (__m128 __A)
13464 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
13465 _MM_FROUND_CUR_DIRECTION);
13468 extern __inline int
13469 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13470 _mm_cvttss_i32 (__m128 __A)
13472 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
13473 _MM_FROUND_CUR_DIRECTION);
13476 #ifdef __x86_64__
13477 extern __inline unsigned long long
13478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13479 _mm_cvtsd_u64 (__m128d __A)
13481 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
13482 __A,
13483 _MM_FROUND_CUR_DIRECTION);
13486 extern __inline unsigned long long
13487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13488 _mm_cvttsd_u64 (__m128d __A)
13490 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
13491 __A,
13492 _MM_FROUND_CUR_DIRECTION);
13495 extern __inline long long
13496 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13497 _mm_cvttsd_i64 (__m128d __A)
13499 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
13500 _MM_FROUND_CUR_DIRECTION);
13502 #endif /* __x86_64__ */
13504 extern __inline unsigned
13505 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13506 _mm_cvtsd_u32 (__m128d __A)
13508 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
13509 _MM_FROUND_CUR_DIRECTION);
13512 extern __inline unsigned
13513 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13514 _mm_cvttsd_u32 (__m128d __A)
13516 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
13517 _MM_FROUND_CUR_DIRECTION);
13520 extern __inline int
13521 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13522 _mm_cvttsd_i32 (__m128d __A)
13524 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
13525 _MM_FROUND_CUR_DIRECTION);
13528 extern __inline __m512d
13529 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13530 _mm512_cvtps_pd (__m256 __A)
13532 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
13533 (__v8df)
13534 _mm512_undefined_pd (),
13535 (__mmask8) -1,
13536 _MM_FROUND_CUR_DIRECTION);
13539 extern __inline __m512d
13540 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13541 _mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
13543 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
13544 (__v8df) __W,
13545 (__mmask8) __U,
13546 _MM_FROUND_CUR_DIRECTION);
13549 extern __inline __m512d
13550 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13551 _mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
13553 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
13554 (__v8df)
13555 _mm512_setzero_pd (),
13556 (__mmask8) __U,
13557 _MM_FROUND_CUR_DIRECTION);
13560 extern __inline __m512
13561 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13562 _mm512_cvtph_ps (__m256i __A)
13564 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
13565 (__v16sf)
13566 _mm512_undefined_ps (),
13567 (__mmask16) -1,
13568 _MM_FROUND_CUR_DIRECTION);
13571 extern __inline __m512
13572 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13573 _mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
13575 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
13576 (__v16sf) __W,
13577 (__mmask16) __U,
13578 _MM_FROUND_CUR_DIRECTION);
13581 extern __inline __m512
13582 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13583 _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
13585 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
13586 (__v16sf)
13587 _mm512_setzero_ps (),
13588 (__mmask16) __U,
13589 _MM_FROUND_CUR_DIRECTION);
13592 extern __inline __m256
13593 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13594 _mm512_cvtpd_ps (__m512d __A)
13596 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
13597 (__v8sf)
13598 _mm256_undefined_ps (),
13599 (__mmask8) -1,
13600 _MM_FROUND_CUR_DIRECTION);
13603 extern __inline __m256
13604 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13605 _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
13607 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
13608 (__v8sf) __W,
13609 (__mmask8) __U,
13610 _MM_FROUND_CUR_DIRECTION);
13613 extern __inline __m256
13614 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13615 _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
13617 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
13618 (__v8sf)
13619 _mm256_setzero_ps (),
13620 (__mmask8) __U,
13621 _MM_FROUND_CUR_DIRECTION);
13624 #ifdef __OPTIMIZE__
13625 extern __inline __m512
13626 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13627 _mm512_getexp_ps (__m512 __A)
13629 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
13630 (__v16sf)
13631 _mm512_undefined_ps (),
13632 (__mmask16) -1,
13633 _MM_FROUND_CUR_DIRECTION);
13636 extern __inline __m512
13637 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13638 _mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
13640 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
13641 (__v16sf) __W,
13642 (__mmask16) __U,
13643 _MM_FROUND_CUR_DIRECTION);
13646 extern __inline __m512
13647 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13648 _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
13650 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
13651 (__v16sf)
13652 _mm512_setzero_ps (),
13653 (__mmask16) __U,
13654 _MM_FROUND_CUR_DIRECTION);
13657 extern __inline __m512d
13658 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13659 _mm512_getexp_pd (__m512d __A)
13661 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
13662 (__v8df)
13663 _mm512_undefined_pd (),
13664 (__mmask8) -1,
13665 _MM_FROUND_CUR_DIRECTION);
13668 extern __inline __m512d
13669 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13670 _mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
13672 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
13673 (__v8df) __W,
13674 (__mmask8) __U,
13675 _MM_FROUND_CUR_DIRECTION);
13678 extern __inline __m512d
13679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13680 _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
13682 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
13683 (__v8df)
13684 _mm512_setzero_pd (),
13685 (__mmask8) __U,
13686 _MM_FROUND_CUR_DIRECTION);
13689 extern __inline __m128
13690 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13691 _mm_getexp_ss (__m128 __A, __m128 __B)
13693 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
13694 (__v4sf) __B,
13695 _MM_FROUND_CUR_DIRECTION);
13698 extern __inline __m128
13699 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13700 _mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
13702 return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
13703 (__v4sf) __B,
13704 (__v4sf) __W,
13705 (__mmask8) __U,
13706 _MM_FROUND_CUR_DIRECTION);
13709 extern __inline __m128
13710 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13711 _mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
13713 return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
13714 (__v4sf) __B,
13715 (__v4sf)
13716 _mm_setzero_ps (),
13717 (__mmask8) __U,
13718 _MM_FROUND_CUR_DIRECTION);
13721 extern __inline __m128d
13722 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13723 _mm_getexp_sd (__m128d __A, __m128d __B)
13725 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
13726 (__v2df) __B,
13727 _MM_FROUND_CUR_DIRECTION);
13730 extern __inline __m128d
13731 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13732 _mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
13734 return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
13735 (__v2df) __B,
13736 (__v2df) __W,
13737 (__mmask8) __U,
13738 _MM_FROUND_CUR_DIRECTION);
13741 extern __inline __m128d
13742 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13743 _mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
13745 return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
13746 (__v2df) __B,
13747 (__v2df)
13748 _mm_setzero_pd (),
13749 (__mmask8) __U,
13750 _MM_FROUND_CUR_DIRECTION);
13753 extern __inline __m512d
13754 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13755 _mm512_getmant_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
13756 _MM_MANTISSA_SIGN_ENUM __C)
13758 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
13759 (__C << 2) | __B,
13760 _mm512_undefined_pd (),
13761 (__mmask8) -1,
13762 _MM_FROUND_CUR_DIRECTION);
13765 extern __inline __m512d
13766 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13767 _mm512_mask_getmant_pd (__m512d __W, __mmask8 __U, __m512d __A,
13768 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
13770 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
13771 (__C << 2) | __B,
13772 (__v8df) __W, __U,
13773 _MM_FROUND_CUR_DIRECTION);
13776 extern __inline __m512d
13777 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13778 _mm512_maskz_getmant_pd (__mmask8 __U, __m512d __A,
13779 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
13781 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
13782 (__C << 2) | __B,
13783 (__v8df)
13784 _mm512_setzero_pd (),
13785 __U,
13786 _MM_FROUND_CUR_DIRECTION);
13789 extern __inline __m512
13790 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13791 _mm512_getmant_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
13792 _MM_MANTISSA_SIGN_ENUM __C)
13794 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
13795 (__C << 2) | __B,
13796 _mm512_undefined_ps (),
13797 (__mmask16) -1,
13798 _MM_FROUND_CUR_DIRECTION);
13801 extern __inline __m512
13802 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13803 _mm512_mask_getmant_ps (__m512 __W, __mmask16 __U, __m512 __A,
13804 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
13806 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
13807 (__C << 2) | __B,
13808 (__v16sf) __W, __U,
13809 _MM_FROUND_CUR_DIRECTION);
13812 extern __inline __m512
13813 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13814 _mm512_maskz_getmant_ps (__mmask16 __U, __m512 __A,
13815 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
13817 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
13818 (__C << 2) | __B,
13819 (__v16sf)
13820 _mm512_setzero_ps (),
13821 __U,
13822 _MM_FROUND_CUR_DIRECTION);
13825 extern __inline __m128d
13826 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13827 _mm_getmant_sd (__m128d __A, __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
13828 _MM_MANTISSA_SIGN_ENUM __D)
13830 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
13831 (__v2df) __B,
13832 (__D << 2) | __C,
13833 _MM_FROUND_CUR_DIRECTION);
13836 extern __inline __m128d
13837 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13838 _mm_mask_getmant_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
13839 _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
13841 return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
13842 (__v2df) __B,
13843 (__D << 2) | __C,
13844 (__v2df) __W,
13845 __U,
13846 _MM_FROUND_CUR_DIRECTION);
13849 extern __inline __m128d
13850 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13851 _mm_maskz_getmant_sd (__mmask8 __U, __m128d __A, __m128d __B,
13852 _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
13854 return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
13855 (__v2df) __B,
13856 (__D << 2) | __C,
13857 (__v2df)
13858 _mm_setzero_pd(),
13859 __U,
13860 _MM_FROUND_CUR_DIRECTION);
13863 extern __inline __m128
13864 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13865 _mm_getmant_ss (__m128 __A, __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
13866 _MM_MANTISSA_SIGN_ENUM __D)
13868 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
13869 (__v4sf) __B,
13870 (__D << 2) | __C,
13871 _MM_FROUND_CUR_DIRECTION);
13874 extern __inline __m128
13875 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13876 _mm_mask_getmant_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
13877 _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
13879 return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
13880 (__v4sf) __B,
13881 (__D << 2) | __C,
13882 (__v4sf) __W,
13883 __U,
13884 _MM_FROUND_CUR_DIRECTION);
13887 extern __inline __m128
13888 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13889 _mm_maskz_getmant_ss (__mmask8 __U, __m128 __A, __m128 __B,
13890 _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
13892 return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
13893 (__v4sf) __B,
13894 (__D << 2) | __C,
13895 (__v4sf)
13896 _mm_setzero_ps(),
13897 __U,
13898 _MM_FROUND_CUR_DIRECTION);
13901 #else
13902 #define _mm512_getmant_pd(X, B, C) \
13903 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
13904 (int)(((C)<<2) | (B)), \
13905 (__v8df)_mm512_undefined_pd(), \
13906 (__mmask8)-1,\
13907 _MM_FROUND_CUR_DIRECTION))
13909 #define _mm512_mask_getmant_pd(W, U, X, B, C) \
13910 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
13911 (int)(((C)<<2) | (B)), \
13912 (__v8df)(__m512d)(W), \
13913 (__mmask8)(U),\
13914 _MM_FROUND_CUR_DIRECTION))
13916 #define _mm512_maskz_getmant_pd(U, X, B, C) \
13917 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
13918 (int)(((C)<<2) | (B)), \
13919 (__v8df)_mm512_setzero_pd(), \
13920 (__mmask8)(U),\
13921 _MM_FROUND_CUR_DIRECTION))
13922 #define _mm512_getmant_ps(X, B, C) \
13923 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
13924 (int)(((C)<<2) | (B)), \
13925 (__v16sf)_mm512_undefined_ps(), \
13926 (__mmask16)-1,\
13927 _MM_FROUND_CUR_DIRECTION))
13929 #define _mm512_mask_getmant_ps(W, U, X, B, C) \
13930 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
13931 (int)(((C)<<2) | (B)), \
13932 (__v16sf)(__m512)(W), \
13933 (__mmask16)(U),\
13934 _MM_FROUND_CUR_DIRECTION))
13936 #define _mm512_maskz_getmant_ps(U, X, B, C) \
13937 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
13938 (int)(((C)<<2) | (B)), \
13939 (__v16sf)_mm512_setzero_ps(), \
13940 (__mmask16)(U),\
13941 _MM_FROUND_CUR_DIRECTION))
13942 #define _mm_getmant_sd(X, Y, C, D) \
13943 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
13944 (__v2df)(__m128d)(Y), \
13945 (int)(((D)<<2) | (C)), \
13946 _MM_FROUND_CUR_DIRECTION))
13948 #define _mm_mask_getmant_sd(W, U, X, Y, C, D) \
13949 ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
13950 (__v2df)(__m128d)(Y), \
13951 (int)(((D)<<2) | (C)), \
13952 (__v2df)(__m128d)(W), \
13953 (__mmask8)(U),\
13954 _MM_FROUND_CUR_DIRECTION))
13956 #define _mm_maskz_getmant_sd(U, X, Y, C, D) \
13957 ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
13958 (__v2df)(__m128d)(Y), \
13959 (int)(((D)<<2) | (C)), \
13960 (__v2df)_mm_setzero_pd(), \
13961 (__mmask8)(U),\
13962 _MM_FROUND_CUR_DIRECTION))
13964 #define _mm_getmant_ss(X, Y, C, D) \
13965 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
13966 (__v4sf)(__m128)(Y), \
13967 (int)(((D)<<2) | (C)), \
13968 _MM_FROUND_CUR_DIRECTION))
13970 #define _mm_mask_getmant_ss(W, U, X, Y, C, D) \
13971 ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
13972 (__v4sf)(__m128)(Y), \
13973 (int)(((D)<<2) | (C)), \
13974 (__v4sf)(__m128)(W), \
13975 (__mmask8)(U),\
13976 _MM_FROUND_CUR_DIRECTION))
13978 #define _mm_maskz_getmant_ss(U, X, Y, C, D) \
13979 ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
13980 (__v4sf)(__m128)(Y), \
13981 (int)(((D)<<2) | (C)), \
13982 (__v4sf)_mm_setzero_ps(), \
13983 (__mmask8)(U),\
13984 _MM_FROUND_CUR_DIRECTION))
13986 #define _mm_getexp_ss(A, B) \
13987 ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
13988 _MM_FROUND_CUR_DIRECTION))
13990 #define _mm_mask_getexp_ss(W, U, A, B) \
13991 (__m128)__builtin_ia32_getexpss_mask_round(A, B, W, U,\
13992 _MM_FROUND_CUR_DIRECTION)
13994 #define _mm_maskz_getexp_ss(U, A, B) \
13995 (__m128)__builtin_ia32_getexpss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U,\
13996 _MM_FROUND_CUR_DIRECTION)
13998 #define _mm_getexp_sd(A, B) \
13999 ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B),\
14000 _MM_FROUND_CUR_DIRECTION))
14002 #define _mm_mask_getexp_sd(W, U, A, B) \
14003 (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, W, U,\
14004 _MM_FROUND_CUR_DIRECTION)
14006 #define _mm_maskz_getexp_sd(U, A, B) \
14007 (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U,\
14008 _MM_FROUND_CUR_DIRECTION)
14010 #define _mm512_getexp_ps(A) \
14011 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
14012 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
14014 #define _mm512_mask_getexp_ps(W, U, A) \
14015 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
14016 (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
14018 #define _mm512_maskz_getexp_ps(U, A) \
14019 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
14020 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
14022 #define _mm512_getexp_pd(A) \
14023 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
14024 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
14026 #define _mm512_mask_getexp_pd(W, U, A) \
14027 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
14028 (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
14030 #define _mm512_maskz_getexp_pd(U, A) \
14031 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
14032 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
14033 #endif
14035 #ifdef __OPTIMIZE__
14036 extern __inline __m512
14037 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14038 _mm512_roundscale_ps (__m512 __A, const int __imm)
14040 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
14041 (__v16sf)
14042 _mm512_undefined_ps (),
14044 _MM_FROUND_CUR_DIRECTION);
14047 extern __inline __m512
14048 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14049 _mm512_mask_roundscale_ps (__m512 __A, __mmask16 __B, __m512 __C,
14050 const int __imm)
14052 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
14053 (__v16sf) __A,
14054 (__mmask16) __B,
14055 _MM_FROUND_CUR_DIRECTION);
14058 extern __inline __m512
14059 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14060 _mm512_maskz_roundscale_ps (__mmask16 __A, __m512 __B, const int __imm)
14062 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
14063 __imm,
14064 (__v16sf)
14065 _mm512_setzero_ps (),
14066 (__mmask16) __A,
14067 _MM_FROUND_CUR_DIRECTION);
14070 extern __inline __m512d
14071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14072 _mm512_roundscale_pd (__m512d __A, const int __imm)
14074 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
14075 (__v8df)
14076 _mm512_undefined_pd (),
14078 _MM_FROUND_CUR_DIRECTION);
14081 extern __inline __m512d
14082 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14083 _mm512_mask_roundscale_pd (__m512d __A, __mmask8 __B, __m512d __C,
14084 const int __imm)
14086 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
14087 (__v8df) __A,
14088 (__mmask8) __B,
14089 _MM_FROUND_CUR_DIRECTION);
14092 extern __inline __m512d
14093 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14094 _mm512_maskz_roundscale_pd (__mmask8 __A, __m512d __B, const int __imm)
14096 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
14097 __imm,
14098 (__v8df)
14099 _mm512_setzero_pd (),
14100 (__mmask8) __A,
14101 _MM_FROUND_CUR_DIRECTION);
14104 extern __inline __m128
14105 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14106 _mm_roundscale_ss (__m128 __A, __m128 __B, const int __imm)
14108 return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
14109 (__v4sf) __B, __imm,
14110 _MM_FROUND_CUR_DIRECTION);
14113 extern __inline __m128d
14114 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14115 _mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm)
14117 return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
14118 (__v2df) __B, __imm,
14119 _MM_FROUND_CUR_DIRECTION);
14122 #else
14123 #define _mm512_roundscale_ps(A, B) \
14124 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
14125 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
14126 #define _mm512_mask_roundscale_ps(A, B, C, D) \
14127 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
14128 (int)(D), \
14129 (__v16sf)(__m512)(A), \
14130 (__mmask16)(B), _MM_FROUND_CUR_DIRECTION))
14131 #define _mm512_maskz_roundscale_ps(A, B, C) \
14132 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
14133 (int)(C), \
14134 (__v16sf)_mm512_setzero_ps(),\
14135 (__mmask16)(A), _MM_FROUND_CUR_DIRECTION))
14136 #define _mm512_roundscale_pd(A, B) \
14137 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
14138 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
14139 #define _mm512_mask_roundscale_pd(A, B, C, D) \
14140 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
14141 (int)(D), \
14142 (__v8df)(__m512d)(A), \
14143 (__mmask8)(B), _MM_FROUND_CUR_DIRECTION))
14144 #define _mm512_maskz_roundscale_pd(A, B, C) \
14145 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
14146 (int)(C), \
14147 (__v8df)_mm512_setzero_pd(),\
14148 (__mmask8)(A), _MM_FROUND_CUR_DIRECTION))
14149 #define _mm_roundscale_ss(A, B, C) \
14150 ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \
14151 (__v4sf)(__m128)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
14152 #define _mm_roundscale_sd(A, B, C) \
14153 ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \
14154 (__v2df)(__m128d)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
14155 #endif
14157 #ifdef __OPTIMIZE__
14158 extern __inline __mmask8
14159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14160 _mm512_cmp_pd_mask (__m512d __X, __m512d __Y, const int __P)
14162 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14163 (__v8df) __Y, __P,
14164 (__mmask8) -1,
14165 _MM_FROUND_CUR_DIRECTION);
14168 extern __inline __mmask16
14169 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14170 _mm512_cmp_ps_mask (__m512 __X, __m512 __Y, const int __P)
14172 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14173 (__v16sf) __Y, __P,
14174 (__mmask16) -1,
14175 _MM_FROUND_CUR_DIRECTION);
14178 extern __inline __mmask16
14179 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14180 _mm512_mask_cmp_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y, const int __P)
14182 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14183 (__v16sf) __Y, __P,
14184 (__mmask16) __U,
14185 _MM_FROUND_CUR_DIRECTION);
14188 extern __inline __mmask8
14189 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14190 _mm512_mask_cmp_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, const int __P)
14192 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14193 (__v8df) __Y, __P,
14194 (__mmask8) __U,
14195 _MM_FROUND_CUR_DIRECTION);
14198 extern __inline __mmask8
14199 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14200 _mm512_cmpeq_pd_mask (__m512d __X, __m512d __Y)
14202 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14203 (__v8df) __Y, _CMP_EQ_OQ,
14204 (__mmask8) -1,
14205 _MM_FROUND_CUR_DIRECTION);
14208 extern __inline __mmask8
14209 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14210 _mm512_mask_cmpeq_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
14212 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14213 (__v8df) __Y, _CMP_EQ_OQ,
14214 (__mmask8) __U,
14215 _MM_FROUND_CUR_DIRECTION);
14218 extern __inline __mmask8
14219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14220 _mm512_cmplt_pd_mask (__m512d __X, __m512d __Y)
14222 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14223 (__v8df) __Y, _CMP_LT_OS,
14224 (__mmask8) -1,
14225 _MM_FROUND_CUR_DIRECTION);
14228 extern __inline __mmask8
14229 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14230 _mm512_mask_cmplt_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
14232 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14233 (__v8df) __Y, _CMP_LT_OS,
14234 (__mmask8) __U,
14235 _MM_FROUND_CUR_DIRECTION);
14238 extern __inline __mmask8
14239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14240 _mm512_cmple_pd_mask (__m512d __X, __m512d __Y)
14242 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14243 (__v8df) __Y, _CMP_LE_OS,
14244 (__mmask8) -1,
14245 _MM_FROUND_CUR_DIRECTION);
14248 extern __inline __mmask8
14249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14250 _mm512_mask_cmple_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
14252 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14253 (__v8df) __Y, _CMP_LE_OS,
14254 (__mmask8) __U,
14255 _MM_FROUND_CUR_DIRECTION);
14258 extern __inline __mmask8
14259 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14260 _mm512_cmpunord_pd_mask (__m512d __X, __m512d __Y)
14262 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14263 (__v8df) __Y, _CMP_UNORD_Q,
14264 (__mmask8) -1,
14265 _MM_FROUND_CUR_DIRECTION);
14268 extern __inline __mmask8
14269 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14270 _mm512_mask_cmpunord_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
14272 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14273 (__v8df) __Y, _CMP_UNORD_Q,
14274 (__mmask8) __U,
14275 _MM_FROUND_CUR_DIRECTION);
14278 extern __inline __mmask8
14279 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14280 _mm512_cmpneq_pd_mask (__m512d __X, __m512d __Y)
14282 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14283 (__v8df) __Y, _CMP_NEQ_UQ,
14284 (__mmask8) -1,
14285 _MM_FROUND_CUR_DIRECTION);
14288 extern __inline __mmask8
14289 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14290 _mm512_mask_cmpneq_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
14292 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14293 (__v8df) __Y, _CMP_NEQ_UQ,
14294 (__mmask8) __U,
14295 _MM_FROUND_CUR_DIRECTION);
14298 extern __inline __mmask8
14299 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14300 _mm512_cmpnlt_pd_mask (__m512d __X, __m512d __Y)
14302 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14303 (__v8df) __Y, _CMP_NLT_US,
14304 (__mmask8) -1,
14305 _MM_FROUND_CUR_DIRECTION);
14308 extern __inline __mmask8
14309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14310 _mm512_mask_cmpnlt_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
14312 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14313 (__v8df) __Y, _CMP_NLT_US,
14314 (__mmask8) __U,
14315 _MM_FROUND_CUR_DIRECTION);
14318 extern __inline __mmask8
14319 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14320 _mm512_cmpnle_pd_mask (__m512d __X, __m512d __Y)
14322 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14323 (__v8df) __Y, _CMP_NLE_US,
14324 (__mmask8) -1,
14325 _MM_FROUND_CUR_DIRECTION);
14328 extern __inline __mmask8
14329 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14330 _mm512_mask_cmpnle_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
14332 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14333 (__v8df) __Y, _CMP_NLE_US,
14334 (__mmask8) __U,
14335 _MM_FROUND_CUR_DIRECTION);
14338 extern __inline __mmask8
14339 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14340 _mm512_cmpord_pd_mask (__m512d __X, __m512d __Y)
14342 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14343 (__v8df) __Y, _CMP_ORD_Q,
14344 (__mmask8) -1,
14345 _MM_FROUND_CUR_DIRECTION);
14348 extern __inline __mmask8
14349 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14350 _mm512_mask_cmpord_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
14352 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14353 (__v8df) __Y, _CMP_ORD_Q,
14354 (__mmask8) __U,
14355 _MM_FROUND_CUR_DIRECTION);
14358 extern __inline __mmask16
14359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14360 _mm512_cmpeq_ps_mask (__m512 __X, __m512 __Y)
14362 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14363 (__v16sf) __Y, _CMP_EQ_OQ,
14364 (__mmask16) -1,
14365 _MM_FROUND_CUR_DIRECTION);
14368 extern __inline __mmask16
14369 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14370 _mm512_mask_cmpeq_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
14372 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14373 (__v16sf) __Y, _CMP_EQ_OQ,
14374 (__mmask16) __U,
14375 _MM_FROUND_CUR_DIRECTION);
14378 extern __inline __mmask16
14379 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14380 _mm512_cmplt_ps_mask (__m512 __X, __m512 __Y)
14382 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14383 (__v16sf) __Y, _CMP_LT_OS,
14384 (__mmask16) -1,
14385 _MM_FROUND_CUR_DIRECTION);
14388 extern __inline __mmask16
14389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14390 _mm512_mask_cmplt_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
14392 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14393 (__v16sf) __Y, _CMP_LT_OS,
14394 (__mmask16) __U,
14395 _MM_FROUND_CUR_DIRECTION);
14398 extern __inline __mmask16
14399 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14400 _mm512_cmple_ps_mask (__m512 __X, __m512 __Y)
14402 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14403 (__v16sf) __Y, _CMP_LE_OS,
14404 (__mmask16) -1,
14405 _MM_FROUND_CUR_DIRECTION);
14408 extern __inline __mmask16
14409 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14410 _mm512_mask_cmple_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
14412 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14413 (__v16sf) __Y, _CMP_LE_OS,
14414 (__mmask16) __U,
14415 _MM_FROUND_CUR_DIRECTION);
14418 extern __inline __mmask16
14419 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14420 _mm512_cmpunord_ps_mask (__m512 __X, __m512 __Y)
14422 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14423 (__v16sf) __Y, _CMP_UNORD_Q,
14424 (__mmask16) -1,
14425 _MM_FROUND_CUR_DIRECTION);
14428 extern __inline __mmask16
14429 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14430 _mm512_mask_cmpunord_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
14432 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14433 (__v16sf) __Y, _CMP_UNORD_Q,
14434 (__mmask16) __U,
14435 _MM_FROUND_CUR_DIRECTION);
14438 extern __inline __mmask16
14439 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14440 _mm512_cmpneq_ps_mask (__m512 __X, __m512 __Y)
14442 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14443 (__v16sf) __Y, _CMP_NEQ_UQ,
14444 (__mmask16) -1,
14445 _MM_FROUND_CUR_DIRECTION);
14448 extern __inline __mmask16
14449 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14450 _mm512_mask_cmpneq_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
14452 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14453 (__v16sf) __Y, _CMP_NEQ_UQ,
14454 (__mmask16) __U,
14455 _MM_FROUND_CUR_DIRECTION);
14458 extern __inline __mmask16
14459 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14460 _mm512_cmpnlt_ps_mask (__m512 __X, __m512 __Y)
14462 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14463 (__v16sf) __Y, _CMP_NLT_US,
14464 (__mmask16) -1,
14465 _MM_FROUND_CUR_DIRECTION);
14468 extern __inline __mmask16
14469 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14470 _mm512_mask_cmpnlt_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
14472 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14473 (__v16sf) __Y, _CMP_NLT_US,
14474 (__mmask16) __U,
14475 _MM_FROUND_CUR_DIRECTION);
14478 extern __inline __mmask16
14479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14480 _mm512_cmpnle_ps_mask (__m512 __X, __m512 __Y)
14482 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14483 (__v16sf) __Y, _CMP_NLE_US,
14484 (__mmask16) -1,
14485 _MM_FROUND_CUR_DIRECTION);
14488 extern __inline __mmask16
14489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14490 _mm512_mask_cmpnle_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
14492 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14493 (__v16sf) __Y, _CMP_NLE_US,
14494 (__mmask16) __U,
14495 _MM_FROUND_CUR_DIRECTION);
14498 extern __inline __mmask16
14499 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14500 _mm512_cmpord_ps_mask (__m512 __X, __m512 __Y)
14502 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14503 (__v16sf) __Y, _CMP_ORD_Q,
14504 (__mmask16) -1,
14505 _MM_FROUND_CUR_DIRECTION);
14508 extern __inline __mmask16
14509 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14510 _mm512_mask_cmpord_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
14512 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14513 (__v16sf) __Y, _CMP_ORD_Q,
14514 (__mmask16) __U,
14515 _MM_FROUND_CUR_DIRECTION);
14518 extern __inline __mmask8
14519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14520 _mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P)
14522 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
14523 (__v2df) __Y, __P,
14524 (__mmask8) -1,
14525 _MM_FROUND_CUR_DIRECTION);
14528 extern __inline __mmask8
14529 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14530 _mm_mask_cmp_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, const int __P)
14532 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
14533 (__v2df) __Y, __P,
14534 (__mmask8) __M,
14535 _MM_FROUND_CUR_DIRECTION);
14538 extern __inline __mmask8
14539 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14540 _mm_cmp_ss_mask (__m128 __X, __m128 __Y, const int __P)
14542 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
14543 (__v4sf) __Y, __P,
14544 (__mmask8) -1,
14545 _MM_FROUND_CUR_DIRECTION);
14548 extern __inline __mmask8
14549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14550 _mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
14552 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
14553 (__v4sf) __Y, __P,
14554 (__mmask8) __M,
14555 _MM_FROUND_CUR_DIRECTION);
14558 #else
14559 #define _mm512_cmp_pd_mask(X, Y, P) \
14560 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
14561 (__v8df)(__m512d)(Y), (int)(P),\
14562 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
14564 #define _mm512_cmp_ps_mask(X, Y, P) \
14565 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
14566 (__v16sf)(__m512)(Y), (int)(P),\
14567 (__mmask16)-1,_MM_FROUND_CUR_DIRECTION))
14569 #define _mm512_mask_cmp_pd_mask(M, X, Y, P) \
14570 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
14571 (__v8df)(__m512d)(Y), (int)(P),\
14572 (__mmask8)M, _MM_FROUND_CUR_DIRECTION))
14574 #define _mm512_mask_cmp_ps_mask(M, X, Y, P) \
14575 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
14576 (__v16sf)(__m512)(Y), (int)(P),\
14577 (__mmask16)M,_MM_FROUND_CUR_DIRECTION))
14579 #define _mm_cmp_sd_mask(X, Y, P) \
14580 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
14581 (__v2df)(__m128d)(Y), (int)(P),\
14582 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
14584 #define _mm_mask_cmp_sd_mask(M, X, Y, P) \
14585 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
14586 (__v2df)(__m128d)(Y), (int)(P),\
14587 M,_MM_FROUND_CUR_DIRECTION))
14589 #define _mm_cmp_ss_mask(X, Y, P) \
14590 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
14591 (__v4sf)(__m128)(Y), (int)(P), \
14592 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
14594 #define _mm_mask_cmp_ss_mask(M, X, Y, P) \
14595 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
14596 (__v4sf)(__m128)(Y), (int)(P), \
14597 M,_MM_FROUND_CUR_DIRECTION))
14598 #endif
14600 extern __inline __mmask16
14601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14602 _mm512_kmov (__mmask16 __A)
14604 return __builtin_ia32_kmovw (__A);
14607 extern __inline __m512
14608 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14609 _mm512_castpd_ps (__m512d __A)
14611 return (__m512) (__A);
14614 extern __inline __m512i
14615 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14616 _mm512_castpd_si512 (__m512d __A)
14618 return (__m512i) (__A);
14621 extern __inline __m512d
14622 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14623 _mm512_castps_pd (__m512 __A)
14625 return (__m512d) (__A);
14628 extern __inline __m512i
14629 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14630 _mm512_castps_si512 (__m512 __A)
14632 return (__m512i) (__A);
14635 extern __inline __m512
14636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14637 _mm512_castsi512_ps (__m512i __A)
14639 return (__m512) (__A);
14642 extern __inline __m512d
14643 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14644 _mm512_castsi512_pd (__m512i __A)
14646 return (__m512d) (__A);
14649 extern __inline __m128d
14650 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14651 _mm512_castpd512_pd128 (__m512d __A)
14653 return (__m128d)_mm512_extractf32x4_ps((__m512)__A, 0);
14656 extern __inline __m128
14657 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14658 _mm512_castps512_ps128 (__m512 __A)
14660 return _mm512_extractf32x4_ps(__A, 0);
14663 extern __inline __m128i
14664 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14665 _mm512_castsi512_si128 (__m512i __A)
14667 return (__m128i)_mm512_extracti32x4_epi32((__m512i)__A, 0);
14670 extern __inline __m256d
14671 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14672 _mm512_castpd512_pd256 (__m512d __A)
14674 return _mm512_extractf64x4_pd(__A, 0);
14677 extern __inline __m256
14678 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14679 _mm512_castps512_ps256 (__m512 __A)
14681 return (__m256)_mm512_extractf64x4_pd((__m512d)__A, 0);
14684 extern __inline __m256i
14685 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14686 _mm512_castsi512_si256 (__m512i __A)
14688 return (__m256i)_mm512_extractf64x4_pd((__m512d)__A, 0);
14691 extern __inline __m512d
14692 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14693 _mm512_castpd128_pd512 (__m128d __A)
14695 return (__m512d) __builtin_ia32_pd512_pd((__m128d)__A);
14698 extern __inline __m512
14699 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14700 _mm512_castps128_ps512 (__m128 __A)
14702 return (__m512) __builtin_ia32_ps512_ps((__m128)__A);
14705 extern __inline __m512i
14706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14707 _mm512_castsi128_si512 (__m128i __A)
14709 return (__m512i) __builtin_ia32_si512_si((__v4si)__A);
14712 extern __inline __m512d
14713 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14714 _mm512_castpd256_pd512 (__m256d __A)
14716 return __builtin_ia32_pd512_256pd (__A);
14719 extern __inline __m512
14720 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14721 _mm512_castps256_ps512 (__m256 __A)
14723 return __builtin_ia32_ps512_256ps (__A);
14726 extern __inline __m512i
14727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14728 _mm512_castsi256_si512 (__m256i __A)
14730 return (__m512i)__builtin_ia32_si512_256si ((__v8si)__A);
14733 extern __inline __mmask16
14734 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14735 _mm512_cmpeq_epu32_mask (__m512i __A, __m512i __B)
14737 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
14738 (__v16si) __B, 0,
14739 (__mmask16) -1);
14742 extern __inline __mmask16
14743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14744 _mm512_mask_cmpeq_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
14746 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
14747 (__v16si) __B, 0, __U);
14750 extern __inline __mmask8
14751 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14752 _mm512_mask_cmpeq_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
14754 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
14755 (__v8di) __B, 0, __U);
14758 extern __inline __mmask8
14759 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14760 _mm512_cmpeq_epu64_mask (__m512i __A, __m512i __B)
14762 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
14763 (__v8di) __B, 0,
14764 (__mmask8) -1);
14767 extern __inline __mmask16
14768 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14769 _mm512_cmpgt_epu32_mask (__m512i __A, __m512i __B)
14771 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
14772 (__v16si) __B, 6,
14773 (__mmask16) -1);
14776 extern __inline __mmask16
14777 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14778 _mm512_mask_cmpgt_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
14780 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
14781 (__v16si) __B, 6, __U);
14784 extern __inline __mmask8
14785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14786 _mm512_mask_cmpgt_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
14788 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
14789 (__v8di) __B, 6, __U);
14792 extern __inline __mmask8
14793 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14794 _mm512_cmpgt_epu64_mask (__m512i __A, __m512i __B)
14796 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
14797 (__v8di) __B, 6,
14798 (__mmask8) -1);
14801 #undef __MM512_REDUCE_OP
14802 #define __MM512_REDUCE_OP(op) \
14803 __v8si __T1 = (__v8si) _mm512_extracti64x4_epi64 (__A, 1); \
14804 __v8si __T2 = (__v8si) _mm512_extracti64x4_epi64 (__A, 0); \
14805 __m256i __T3 = (__m256i) (__T1 op __T2); \
14806 __v4si __T4 = (__v4si) _mm256_extracti128_si256 (__T3, 1); \
14807 __v4si __T5 = (__v4si) _mm256_extracti128_si256 (__T3, 0); \
14808 __v4si __T6 = __T4 op __T5; \
14809 __v4si __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 }); \
14810 __v4si __T8 = __T6 op __T7; \
14811 return __T8[0] op __T8[1]
14813 extern __inline int
14814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14815 _mm512_reduce_add_epi32 (__m512i __A)
14817 __MM512_REDUCE_OP (+);
14820 extern __inline int
14821 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14822 _mm512_reduce_mul_epi32 (__m512i __A)
14824 __MM512_REDUCE_OP (*);
14827 extern __inline int
14828 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14829 _mm512_reduce_and_epi32 (__m512i __A)
14831 __MM512_REDUCE_OP (&);
14834 extern __inline int
14835 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14836 _mm512_reduce_or_epi32 (__m512i __A)
14838 __MM512_REDUCE_OP (|);
14841 extern __inline int
14842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14843 _mm512_mask_reduce_add_epi32 (__mmask16 __U, __m512i __A)
14845 __A = _mm512_maskz_mov_epi32 (__U, __A);
14846 __MM512_REDUCE_OP (+);
14849 extern __inline int
14850 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14851 _mm512_mask_reduce_mul_epi32 (__mmask16 __U, __m512i __A)
14853 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (1), __U, __A);
14854 __MM512_REDUCE_OP (*);
14857 extern __inline int
14858 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14859 _mm512_mask_reduce_and_epi32 (__mmask16 __U, __m512i __A)
14861 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (~0), __U, __A);
14862 __MM512_REDUCE_OP (&);
14865 extern __inline int
14866 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14867 _mm512_mask_reduce_or_epi32 (__mmask16 __U, __m512i __A)
14869 __A = _mm512_maskz_mov_epi32 (__U, __A);
14870 __MM512_REDUCE_OP (|);
14873 #undef __MM512_REDUCE_OP
14874 #define __MM512_REDUCE_OP(op) \
14875 __m256i __T1 = (__m256i) _mm512_extracti64x4_epi64 (__A, 1); \
14876 __m256i __T2 = (__m256i) _mm512_extracti64x4_epi64 (__A, 0); \
14877 __m256i __T3 = _mm256_##op (__T1, __T2); \
14878 __m128i __T4 = (__m128i) _mm256_extracti128_si256 (__T3, 1); \
14879 __m128i __T5 = (__m128i) _mm256_extracti128_si256 (__T3, 0); \
14880 __m128i __T6 = _mm_##op (__T4, __T5); \
14881 __m128i __T7 = (__m128i) __builtin_shuffle ((__v4si) __T6, \
14882 (__v4si) { 2, 3, 0, 1 }); \
14883 __m128i __T8 = _mm_##op (__T6, __T7); \
14884 __m128i __T9 = (__m128i) __builtin_shuffle ((__v4si) __T8, \
14885 (__v4si) { 1, 0, 1, 0 }); \
14886 __v4si __T10 = (__v4si) _mm_##op (__T8, __T9); \
14887 return __T10[0]
14889 extern __inline int
14890 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14891 _mm512_reduce_min_epi32 (__m512i __A)
14893 __MM512_REDUCE_OP (min_epi32);
14896 extern __inline int
14897 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14898 _mm512_reduce_max_epi32 (__m512i __A)
14900 __MM512_REDUCE_OP (max_epi32);
14903 extern __inline unsigned int
14904 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14905 _mm512_reduce_min_epu32 (__m512i __A)
14907 __MM512_REDUCE_OP (min_epu32);
14910 extern __inline unsigned int
14911 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14912 _mm512_reduce_max_epu32 (__m512i __A)
14914 __MM512_REDUCE_OP (max_epu32);
14917 extern __inline int
14918 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14919 _mm512_mask_reduce_min_epi32 (__mmask16 __U, __m512i __A)
14921 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (__INT_MAX__), __U, __A);
14922 __MM512_REDUCE_OP (min_epi32);
14925 extern __inline int
14926 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14927 _mm512_mask_reduce_max_epi32 (__mmask16 __U, __m512i __A)
14929 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (-__INT_MAX__ - 1), __U, __A);
14930 __MM512_REDUCE_OP (max_epi32);
14933 extern __inline unsigned int
14934 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14935 _mm512_mask_reduce_min_epu32 (__mmask16 __U, __m512i __A)
14937 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (~0), __U, __A);
14938 __MM512_REDUCE_OP (min_epu32);
14941 extern __inline unsigned int
14942 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14943 _mm512_mask_reduce_max_epu32 (__mmask16 __U, __m512i __A)
14945 __A = _mm512_maskz_mov_epi32 (__U, __A);
14946 __MM512_REDUCE_OP (max_epu32);
14949 #undef __MM512_REDUCE_OP
14950 #define __MM512_REDUCE_OP(op) \
14951 __m256 __T1 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 1); \
14952 __m256 __T2 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 0); \
14953 __m256 __T3 = __T1 op __T2; \
14954 __m128 __T4 = _mm256_extractf128_ps (__T3, 1); \
14955 __m128 __T5 = _mm256_extractf128_ps (__T3, 0); \
14956 __m128 __T6 = __T4 op __T5; \
14957 __m128 __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 }); \
14958 __m128 __T8 = __T6 op __T7; \
14959 return __T8[0] op __T8[1]
14961 extern __inline float
14962 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14963 _mm512_reduce_add_ps (__m512 __A)
14965 __MM512_REDUCE_OP (+);
14968 extern __inline float
14969 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14970 _mm512_reduce_mul_ps (__m512 __A)
14972 __MM512_REDUCE_OP (*);
14975 extern __inline float
14976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14977 _mm512_mask_reduce_add_ps (__mmask16 __U, __m512 __A)
14979 __A = _mm512_maskz_mov_ps (__U, __A);
14980 __MM512_REDUCE_OP (+);
14983 extern __inline float
14984 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14985 _mm512_mask_reduce_mul_ps (__mmask16 __U, __m512 __A)
14987 __A = _mm512_mask_mov_ps (_mm512_set1_ps (1.0f), __U, __A);
14988 __MM512_REDUCE_OP (*);
14991 #undef __MM512_REDUCE_OP
14992 #define __MM512_REDUCE_OP(op) \
14993 __m256 __T1 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 1); \
14994 __m256 __T2 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 0); \
14995 __m256 __T3 = _mm256_##op (__T1, __T2); \
14996 __m128 __T4 = _mm256_extractf128_ps (__T3, 1); \
14997 __m128 __T5 = _mm256_extractf128_ps (__T3, 0); \
14998 __m128 __T6 = _mm_##op (__T4, __T5); \
14999 __m128 __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 }); \
15000 __m128 __T8 = _mm_##op (__T6, __T7); \
15001 __m128 __T9 = __builtin_shuffle (__T8, (__v4si) { 1, 0, 1, 0 }); \
15002 __m128 __T10 = _mm_##op (__T8, __T9); \
15003 return __T10[0]
15005 extern __inline float
15006 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15007 _mm512_reduce_min_ps (__m512 __A)
15009 __MM512_REDUCE_OP (min_ps);
15012 extern __inline float
15013 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15014 _mm512_reduce_max_ps (__m512 __A)
15016 __MM512_REDUCE_OP (max_ps);
15019 extern __inline float
15020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15021 _mm512_mask_reduce_min_ps (__mmask16 __U, __m512 __A)
15023 __A = _mm512_mask_mov_ps (_mm512_set1_ps (__builtin_inff ()), __U, __A);
15024 __MM512_REDUCE_OP (min_ps);
15027 extern __inline float
15028 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15029 _mm512_mask_reduce_max_ps (__mmask16 __U, __m512 __A)
15031 __A = _mm512_mask_mov_ps (_mm512_set1_ps (-__builtin_inff ()), __U, __A);
15032 __MM512_REDUCE_OP (max_ps);
15035 #undef __MM512_REDUCE_OP
15036 #define __MM512_REDUCE_OP(op) \
15037 __v4di __T1 = (__v4di) _mm512_extracti64x4_epi64 (__A, 1); \
15038 __v4di __T2 = (__v4di) _mm512_extracti64x4_epi64 (__A, 0); \
15039 __m256i __T3 = (__m256i) (__T1 op __T2); \
15040 __v2di __T4 = (__v2di) _mm256_extracti128_si256 (__T3, 1); \
15041 __v2di __T5 = (__v2di) _mm256_extracti128_si256 (__T3, 0); \
15042 __v2di __T6 = __T4 op __T5; \
15043 return __T6[0] op __T6[1]
15045 extern __inline long long
15046 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15047 _mm512_reduce_add_epi64 (__m512i __A)
15049 __MM512_REDUCE_OP (+);
15052 extern __inline long long
15053 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15054 _mm512_reduce_mul_epi64 (__m512i __A)
15056 __MM512_REDUCE_OP (*);
15059 extern __inline long long
15060 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15061 _mm512_reduce_and_epi64 (__m512i __A)
15063 __MM512_REDUCE_OP (&);
15066 extern __inline long long
15067 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15068 _mm512_reduce_or_epi64 (__m512i __A)
15070 __MM512_REDUCE_OP (|);
15073 extern __inline long long
15074 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15075 _mm512_mask_reduce_add_epi64 (__mmask8 __U, __m512i __A)
15077 __A = _mm512_maskz_mov_epi64 (__U, __A);
15078 __MM512_REDUCE_OP (+);
15081 extern __inline long long
15082 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15083 _mm512_mask_reduce_mul_epi64 (__mmask8 __U, __m512i __A)
15085 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (1LL), __U, __A);
15086 __MM512_REDUCE_OP (*);
15089 extern __inline long long
15090 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15091 _mm512_mask_reduce_and_epi64 (__mmask8 __U, __m512i __A)
15093 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (~0LL), __U, __A);
15094 __MM512_REDUCE_OP (&);
15097 extern __inline long long
15098 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15099 _mm512_mask_reduce_or_epi64 (__mmask8 __U, __m512i __A)
15101 __A = _mm512_maskz_mov_epi64 (__U, __A);
15102 __MM512_REDUCE_OP (|);
15105 #undef __MM512_REDUCE_OP
15106 #define __MM512_REDUCE_OP(op) \
15107 __m512i __T1 = _mm512_shuffle_i64x2 (__A, __A, 0x4e); \
15108 __m512i __T2 = _mm512_##op (__A, __T1); \
15109 __m512i __T3 \
15110 = (__m512i) __builtin_shuffle ((__v8di) __T2, \
15111 (__v8di) { 2, 3, 0, 1, 6, 7, 4, 5 });\
15112 __m512i __T4 = _mm512_##op (__T2, __T3); \
15113 __m512i __T5 \
15114 = (__m512i) __builtin_shuffle ((__v8di) __T4, \
15115 (__v8di) { 1, 0, 3, 2, 5, 4, 7, 6 });\
15116 __v8di __T6 = (__v8di) _mm512_##op (__T4, __T5); \
15117 return __T6[0]
15119 extern __inline long long
15120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15121 _mm512_reduce_min_epi64 (__m512i __A)
15123 __MM512_REDUCE_OP (min_epi64);
15126 extern __inline long long
15127 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15128 _mm512_reduce_max_epi64 (__m512i __A)
15130 __MM512_REDUCE_OP (max_epi64);
15133 extern __inline long long
15134 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15135 _mm512_mask_reduce_min_epi64 (__mmask8 __U, __m512i __A)
15137 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (__LONG_LONG_MAX__),
15138 __U, __A);
15139 __MM512_REDUCE_OP (min_epi64);
15142 extern __inline long long
15143 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15144 _mm512_mask_reduce_max_epi64 (__mmask8 __U, __m512i __A)
15146 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (-__LONG_LONG_MAX__ - 1),
15147 __U, __A);
15148 __MM512_REDUCE_OP (max_epi64);
15151 extern __inline unsigned long long
15152 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15153 _mm512_reduce_min_epu64 (__m512i __A)
15155 __MM512_REDUCE_OP (min_epu64);
15158 extern __inline unsigned long long
15159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15160 _mm512_reduce_max_epu64 (__m512i __A)
15162 __MM512_REDUCE_OP (max_epu64);
15165 extern __inline unsigned long long
15166 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15167 _mm512_mask_reduce_min_epu64 (__mmask8 __U, __m512i __A)
15169 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (~0LL), __U, __A);
15170 __MM512_REDUCE_OP (min_epu64);
15173 extern __inline unsigned long long
15174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15175 _mm512_mask_reduce_max_epu64 (__mmask8 __U, __m512i __A)
15177 __A = _mm512_maskz_mov_epi64 (__U, __A);
15178 __MM512_REDUCE_OP (max_epu64);
15181 #undef __MM512_REDUCE_OP
15182 #define __MM512_REDUCE_OP(op) \
15183 __m256d __T1 = (__m256d) _mm512_extractf64x4_pd (__A, 1); \
15184 __m256d __T2 = (__m256d) _mm512_extractf64x4_pd (__A, 0); \
15185 __m256d __T3 = __T1 op __T2; \
15186 __m128d __T4 = _mm256_extractf128_pd (__T3, 1); \
15187 __m128d __T5 = _mm256_extractf128_pd (__T3, 0); \
15188 __m128d __T6 = __T4 op __T5; \
15189 return __T6[0] op __T6[1]
15191 extern __inline double
15192 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15193 _mm512_reduce_add_pd (__m512d __A)
15195 __MM512_REDUCE_OP (+);
15198 extern __inline double
15199 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15200 _mm512_reduce_mul_pd (__m512d __A)
15202 __MM512_REDUCE_OP (*);
15205 extern __inline double
15206 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15207 _mm512_mask_reduce_add_pd (__mmask8 __U, __m512d __A)
15209 __A = _mm512_maskz_mov_pd (__U, __A);
15210 __MM512_REDUCE_OP (+);
15213 extern __inline double
15214 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15215 _mm512_mask_reduce_mul_pd (__mmask8 __U, __m512d __A)
15217 __A = _mm512_mask_mov_pd (_mm512_set1_pd (1.0), __U, __A);
15218 __MM512_REDUCE_OP (*);
15221 #undef __MM512_REDUCE_OP
15222 #define __MM512_REDUCE_OP(op) \
15223 __m256d __T1 = (__m256d) _mm512_extractf64x4_pd (__A, 1); \
15224 __m256d __T2 = (__m256d) _mm512_extractf64x4_pd (__A, 0); \
15225 __m256d __T3 = _mm256_##op (__T1, __T2); \
15226 __m128d __T4 = _mm256_extractf128_pd (__T3, 1); \
15227 __m128d __T5 = _mm256_extractf128_pd (__T3, 0); \
15228 __m128d __T6 = _mm_##op (__T4, __T5); \
15229 __m128d __T7 = (__m128d) __builtin_shuffle (__T6, (__v2di) { 1, 0 }); \
15230 __m128d __T8 = _mm_##op (__T6, __T7); \
15231 return __T8[0]
15233 extern __inline double
15234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15235 _mm512_reduce_min_pd (__m512d __A)
15237 __MM512_REDUCE_OP (min_pd);
15240 extern __inline double
15241 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15242 _mm512_reduce_max_pd (__m512d __A)
15244 __MM512_REDUCE_OP (max_pd);
15247 extern __inline double
15248 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15249 _mm512_mask_reduce_min_pd (__mmask8 __U, __m512d __A)
15251 __A = _mm512_mask_mov_pd (_mm512_set1_pd (__builtin_inf ()), __U, __A);
15252 __MM512_REDUCE_OP (min_pd);
15255 extern __inline double
15256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15257 _mm512_mask_reduce_max_pd (__mmask8 __U, __m512d __A)
15259 __A = _mm512_mask_mov_pd (_mm512_set1_pd (-__builtin_inf ()), __U, __A);
15260 __MM512_REDUCE_OP (max_pd);
15263 #undef __MM512_REDUCE_OP
15265 #ifdef __DISABLE_AVX512F__
15266 #undef __DISABLE_AVX512F__
15267 #pragma GCC pop_options
15268 #endif /* __DISABLE_AVX512F__ */
15270 #endif /* _AVX512FINTRIN_H_INCLUDED */