Import GCC-8 to a new vendor branch
[dragonfly.git] / contrib / gcc-8.0 / gcc / config / i386 / avx512fintrin.h
blobba65acadf8ddad9d6eea786cb37608a878d71f02
1 /* Copyright (C) 2013-2018 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
24 #ifndef _IMMINTRIN_H_INCLUDED
25 #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
26 #endif
28 #ifndef _AVX512FINTRIN_H_INCLUDED
29 #define _AVX512FINTRIN_H_INCLUDED
31 #ifndef __AVX512F__
32 #pragma GCC push_options
33 #pragma GCC target("avx512f")
34 #define __DISABLE_AVX512F__
35 #endif /* __AVX512F__ */
37 /* Internal data types for implementing the intrinsics. */
38 typedef double __v8df __attribute__ ((__vector_size__ (64)));
39 typedef float __v16sf __attribute__ ((__vector_size__ (64)));
40 typedef long long __v8di __attribute__ ((__vector_size__ (64)));
41 typedef unsigned long long __v8du __attribute__ ((__vector_size__ (64)));
42 typedef int __v16si __attribute__ ((__vector_size__ (64)));
43 typedef unsigned int __v16su __attribute__ ((__vector_size__ (64)));
44 typedef short __v32hi __attribute__ ((__vector_size__ (64)));
45 typedef unsigned short __v32hu __attribute__ ((__vector_size__ (64)));
46 typedef char __v64qi __attribute__ ((__vector_size__ (64)));
47 typedef unsigned char __v64qu __attribute__ ((__vector_size__ (64)));
49 /* The Intel API is flexible enough that we must allow aliasing with other
50 vector types, and their scalar components. */
51 typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
52 typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
53 typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
55 /* Unaligned version of the same type. */
56 typedef float __m512_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
57 typedef long long __m512i_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
58 typedef double __m512d_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
60 typedef unsigned char __mmask8;
61 typedef unsigned short __mmask16;
63 extern __inline __mmask16
64 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
65 _mm512_int2mask (int __M)
67 return (__mmask16) __M;
70 extern __inline int
71 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
72 _mm512_mask2int (__mmask16 __M)
74 return (int) __M;
77 extern __inline __m512i
78 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79 _mm512_set_epi64 (long long __A, long long __B, long long __C,
80 long long __D, long long __E, long long __F,
81 long long __G, long long __H)
83 return __extension__ (__m512i) (__v8di)
84 { __H, __G, __F, __E, __D, __C, __B, __A };
87 /* Create the vector [A B C D E F G H I J K L M N O P]. */
88 extern __inline __m512i
89 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
90 _mm512_set_epi32 (int __A, int __B, int __C, int __D,
91 int __E, int __F, int __G, int __H,
92 int __I, int __J, int __K, int __L,
93 int __M, int __N, int __O, int __P)
95 return __extension__ (__m512i)(__v16si)
96 { __P, __O, __N, __M, __L, __K, __J, __I,
97 __H, __G, __F, __E, __D, __C, __B, __A };
100 extern __inline __m512d
101 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
102 _mm512_set_pd (double __A, double __B, double __C, double __D,
103 double __E, double __F, double __G, double __H)
105 return __extension__ (__m512d)
106 { __H, __G, __F, __E, __D, __C, __B, __A };
109 extern __inline __m512
110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
111 _mm512_set_ps (float __A, float __B, float __C, float __D,
112 float __E, float __F, float __G, float __H,
113 float __I, float __J, float __K, float __L,
114 float __M, float __N, float __O, float __P)
116 return __extension__ (__m512)
117 { __P, __O, __N, __M, __L, __K, __J, __I,
118 __H, __G, __F, __E, __D, __C, __B, __A };
121 #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \
122 _mm512_set_epi64(e7,e6,e5,e4,e3,e2,e1,e0)
124 #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \
125 e8,e9,e10,e11,e12,e13,e14,e15) \
126 _mm512_set_epi32(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
128 #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \
129 _mm512_set_pd(e7,e6,e5,e4,e3,e2,e1,e0)
131 #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
132 _mm512_set_ps(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
134 extern __inline __m512
135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
136 _mm512_undefined_ps (void)
138 __m512 __Y = __Y;
139 return __Y;
142 #define _mm512_undefined _mm512_undefined_ps
144 extern __inline __m512d
145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
146 _mm512_undefined_pd (void)
148 __m512d __Y = __Y;
149 return __Y;
152 extern __inline __m512i
153 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
154 _mm512_undefined_epi32 (void)
156 __m512i __Y = __Y;
157 return __Y;
160 #define _mm512_undefined_si512 _mm512_undefined_epi32
162 extern __inline __m512i
163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
164 _mm512_set1_epi8 (char __A)
166 return __extension__ (__m512i)(__v64qi)
167 { __A, __A, __A, __A, __A, __A, __A, __A,
168 __A, __A, __A, __A, __A, __A, __A, __A,
169 __A, __A, __A, __A, __A, __A, __A, __A,
170 __A, __A, __A, __A, __A, __A, __A, __A,
171 __A, __A, __A, __A, __A, __A, __A, __A,
172 __A, __A, __A, __A, __A, __A, __A, __A,
173 __A, __A, __A, __A, __A, __A, __A, __A,
174 __A, __A, __A, __A, __A, __A, __A, __A };
177 extern __inline __m512i
178 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
179 _mm512_set1_epi16 (short __A)
181 return __extension__ (__m512i)(__v32hi)
182 { __A, __A, __A, __A, __A, __A, __A, __A,
183 __A, __A, __A, __A, __A, __A, __A, __A,
184 __A, __A, __A, __A, __A, __A, __A, __A,
185 __A, __A, __A, __A, __A, __A, __A, __A };
188 extern __inline __m512d
189 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
190 _mm512_set1_pd (double __A)
192 return (__m512d) __builtin_ia32_broadcastsd512 (__extension__
193 (__v2df) { __A, },
194 (__v8df)
195 _mm512_undefined_pd (),
196 (__mmask8) -1);
199 extern __inline __m512
200 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
201 _mm512_set1_ps (float __A)
203 return (__m512) __builtin_ia32_broadcastss512 (__extension__
204 (__v4sf) { __A, },
205 (__v16sf)
206 _mm512_undefined_ps (),
207 (__mmask16) -1);
210 /* Create the vector [A B C D A B C D A B C D A B C D]. */
211 extern __inline __m512i
212 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
213 _mm512_set4_epi32 (int __A, int __B, int __C, int __D)
215 return __extension__ (__m512i)(__v16si)
216 { __D, __C, __B, __A, __D, __C, __B, __A,
217 __D, __C, __B, __A, __D, __C, __B, __A };
220 extern __inline __m512i
221 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
222 _mm512_set4_epi64 (long long __A, long long __B, long long __C,
223 long long __D)
225 return __extension__ (__m512i) (__v8di)
226 { __D, __C, __B, __A, __D, __C, __B, __A };
229 extern __inline __m512d
230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
231 _mm512_set4_pd (double __A, double __B, double __C, double __D)
233 return __extension__ (__m512d)
234 { __D, __C, __B, __A, __D, __C, __B, __A };
237 extern __inline __m512
238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
239 _mm512_set4_ps (float __A, float __B, float __C, float __D)
241 return __extension__ (__m512)
242 { __D, __C, __B, __A, __D, __C, __B, __A,
243 __D, __C, __B, __A, __D, __C, __B, __A };
246 #define _mm512_setr4_epi64(e0,e1,e2,e3) \
247 _mm512_set4_epi64(e3,e2,e1,e0)
249 #define _mm512_setr4_epi32(e0,e1,e2,e3) \
250 _mm512_set4_epi32(e3,e2,e1,e0)
252 #define _mm512_setr4_pd(e0,e1,e2,e3) \
253 _mm512_set4_pd(e3,e2,e1,e0)
255 #define _mm512_setr4_ps(e0,e1,e2,e3) \
256 _mm512_set4_ps(e3,e2,e1,e0)
258 extern __inline __m512
259 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
260 _mm512_setzero_ps (void)
262 return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
263 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
266 extern __inline __m512d
267 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
268 _mm512_setzero_pd (void)
270 return __extension__ (__m512d) { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
273 extern __inline __m512i
274 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
275 _mm512_setzero_epi32 (void)
277 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
280 extern __inline __m512i
281 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
282 _mm512_setzero_si512 (void)
284 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
287 extern __inline __m512d
288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
289 _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
291 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
292 (__v8df) __W,
293 (__mmask8) __U);
296 extern __inline __m512d
297 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
298 _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
300 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
301 (__v8df)
302 _mm512_setzero_pd (),
303 (__mmask8) __U);
306 extern __inline __m512
307 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
308 _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
310 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
311 (__v16sf) __W,
312 (__mmask16) __U);
315 extern __inline __m512
316 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
317 _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
319 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
320 (__v16sf)
321 _mm512_setzero_ps (),
322 (__mmask16) __U);
325 extern __inline __m512d
326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
327 _mm512_load_pd (void const *__P)
329 return *(__m512d *) __P;
332 extern __inline __m512d
333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
334 _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
336 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
337 (__v8df) __W,
338 (__mmask8) __U);
341 extern __inline __m512d
342 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
343 _mm512_maskz_load_pd (__mmask8 __U, void const *__P)
345 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
346 (__v8df)
347 _mm512_setzero_pd (),
348 (__mmask8) __U);
351 extern __inline void
352 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
353 _mm512_store_pd (void *__P, __m512d __A)
355 *(__m512d *) __P = __A;
358 extern __inline void
359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
360 _mm512_mask_store_pd (void *__P, __mmask8 __U, __m512d __A)
362 __builtin_ia32_storeapd512_mask ((__v8df *) __P, (__v8df) __A,
363 (__mmask8) __U);
366 extern __inline __m512
367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
368 _mm512_load_ps (void const *__P)
370 return *(__m512 *) __P;
373 extern __inline __m512
374 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
375 _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
377 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
378 (__v16sf) __W,
379 (__mmask16) __U);
382 extern __inline __m512
383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
384 _mm512_maskz_load_ps (__mmask16 __U, void const *__P)
386 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
387 (__v16sf)
388 _mm512_setzero_ps (),
389 (__mmask16) __U);
392 extern __inline void
393 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
394 _mm512_store_ps (void *__P, __m512 __A)
396 *(__m512 *) __P = __A;
399 extern __inline void
400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
401 _mm512_mask_store_ps (void *__P, __mmask16 __U, __m512 __A)
403 __builtin_ia32_storeaps512_mask ((__v16sf *) __P, (__v16sf) __A,
404 (__mmask16) __U);
407 extern __inline __m512i
408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
409 _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
411 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
412 (__v8di) __W,
413 (__mmask8) __U);
416 extern __inline __m512i
417 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
418 _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
420 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
421 (__v8di)
422 _mm512_setzero_si512 (),
423 (__mmask8) __U);
426 extern __inline __m512i
427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
428 _mm512_load_epi64 (void const *__P)
430 return *(__m512i *) __P;
433 extern __inline __m512i
434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
435 _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
437 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
438 (__v8di) __W,
439 (__mmask8) __U);
442 extern __inline __m512i
443 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
444 _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
446 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
447 (__v8di)
448 _mm512_setzero_si512 (),
449 (__mmask8) __U);
452 extern __inline void
453 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
454 _mm512_store_epi64 (void *__P, __m512i __A)
456 *(__m512i *) __P = __A;
459 extern __inline void
460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
461 _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
463 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
464 (__mmask8) __U);
467 extern __inline __m512i
468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
469 _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
471 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
472 (__v16si) __W,
473 (__mmask16) __U);
476 extern __inline __m512i
477 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
478 _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
480 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
481 (__v16si)
482 _mm512_setzero_si512 (),
483 (__mmask16) __U);
486 extern __inline __m512i
487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
488 _mm512_load_si512 (void const *__P)
490 return *(__m512i *) __P;
493 extern __inline __m512i
494 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
495 _mm512_load_epi32 (void const *__P)
497 return *(__m512i *) __P;
500 extern __inline __m512i
501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
502 _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
504 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
505 (__v16si) __W,
506 (__mmask16) __U);
509 extern __inline __m512i
510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
511 _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
513 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
514 (__v16si)
515 _mm512_setzero_si512 (),
516 (__mmask16) __U);
519 extern __inline void
520 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
521 _mm512_store_si512 (void *__P, __m512i __A)
523 *(__m512i *) __P = __A;
526 extern __inline void
527 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
528 _mm512_store_epi32 (void *__P, __m512i __A)
530 *(__m512i *) __P = __A;
533 extern __inline void
534 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
535 _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
537 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
538 (__mmask16) __U);
541 extern __inline __m512i
542 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
543 _mm512_mullo_epi32 (__m512i __A, __m512i __B)
545 return (__m512i) ((__v16su) __A * (__v16su) __B);
548 extern __inline __m512i
549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
550 _mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
552 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
553 (__v16si) __B,
554 (__v16si)
555 _mm512_setzero_si512 (),
556 __M);
559 extern __inline __m512i
560 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
561 _mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
563 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
564 (__v16si) __B,
565 (__v16si) __W, __M);
568 extern __inline __m512i
569 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
570 _mm512_sllv_epi32 (__m512i __X, __m512i __Y)
572 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
573 (__v16si) __Y,
574 (__v16si)
575 _mm512_undefined_epi32 (),
576 (__mmask16) -1);
579 extern __inline __m512i
580 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
581 _mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
583 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
584 (__v16si) __Y,
585 (__v16si) __W,
586 (__mmask16) __U);
589 extern __inline __m512i
590 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
591 _mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
593 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
594 (__v16si) __Y,
595 (__v16si)
596 _mm512_setzero_si512 (),
597 (__mmask16) __U);
600 extern __inline __m512i
601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
602 _mm512_srav_epi32 (__m512i __X, __m512i __Y)
604 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
605 (__v16si) __Y,
606 (__v16si)
607 _mm512_undefined_epi32 (),
608 (__mmask16) -1);
611 extern __inline __m512i
612 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
613 _mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
615 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
616 (__v16si) __Y,
617 (__v16si) __W,
618 (__mmask16) __U);
621 extern __inline __m512i
622 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
623 _mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
625 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
626 (__v16si) __Y,
627 (__v16si)
628 _mm512_setzero_si512 (),
629 (__mmask16) __U);
632 extern __inline __m512i
633 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
634 _mm512_srlv_epi32 (__m512i __X, __m512i __Y)
636 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
637 (__v16si) __Y,
638 (__v16si)
639 _mm512_undefined_epi32 (),
640 (__mmask16) -1);
643 extern __inline __m512i
644 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
645 _mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
647 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
648 (__v16si) __Y,
649 (__v16si) __W,
650 (__mmask16) __U);
653 extern __inline __m512i
654 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
655 _mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
657 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
658 (__v16si) __Y,
659 (__v16si)
660 _mm512_setzero_si512 (),
661 (__mmask16) __U);
664 extern __inline __m512i
665 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
666 _mm512_add_epi64 (__m512i __A, __m512i __B)
668 return (__m512i) ((__v8du) __A + (__v8du) __B);
671 extern __inline __m512i
672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
673 _mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
675 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
676 (__v8di) __B,
677 (__v8di) __W,
678 (__mmask8) __U);
681 extern __inline __m512i
682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
683 _mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
685 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
686 (__v8di) __B,
687 (__v8di)
688 _mm512_setzero_si512 (),
689 (__mmask8) __U);
692 extern __inline __m512i
693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
694 _mm512_sub_epi64 (__m512i __A, __m512i __B)
696 return (__m512i) ((__v8du) __A - (__v8du) __B);
699 extern __inline __m512i
700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
701 _mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
703 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
704 (__v8di) __B,
705 (__v8di) __W,
706 (__mmask8) __U);
709 extern __inline __m512i
710 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
711 _mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
713 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
714 (__v8di) __B,
715 (__v8di)
716 _mm512_setzero_si512 (),
717 (__mmask8) __U);
720 extern __inline __m512i
721 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
722 _mm512_sllv_epi64 (__m512i __X, __m512i __Y)
724 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
725 (__v8di) __Y,
726 (__v8di)
727 _mm512_undefined_pd (),
728 (__mmask8) -1);
731 extern __inline __m512i
732 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
733 _mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
735 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
736 (__v8di) __Y,
737 (__v8di) __W,
738 (__mmask8) __U);
741 extern __inline __m512i
742 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
743 _mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
745 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
746 (__v8di) __Y,
747 (__v8di)
748 _mm512_setzero_si512 (),
749 (__mmask8) __U);
752 extern __inline __m512i
753 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
754 _mm512_srav_epi64 (__m512i __X, __m512i __Y)
756 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
757 (__v8di) __Y,
758 (__v8di)
759 _mm512_undefined_epi32 (),
760 (__mmask8) -1);
763 extern __inline __m512i
764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
765 _mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
767 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
768 (__v8di) __Y,
769 (__v8di) __W,
770 (__mmask8) __U);
773 extern __inline __m512i
774 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
775 _mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
777 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
778 (__v8di) __Y,
779 (__v8di)
780 _mm512_setzero_si512 (),
781 (__mmask8) __U);
784 extern __inline __m512i
785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
786 _mm512_srlv_epi64 (__m512i __X, __m512i __Y)
788 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
789 (__v8di) __Y,
790 (__v8di)
791 _mm512_undefined_epi32 (),
792 (__mmask8) -1);
795 extern __inline __m512i
796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
797 _mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
799 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
800 (__v8di) __Y,
801 (__v8di) __W,
802 (__mmask8) __U);
805 extern __inline __m512i
806 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
807 _mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
809 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
810 (__v8di) __Y,
811 (__v8di)
812 _mm512_setzero_si512 (),
813 (__mmask8) __U);
816 extern __inline __m512i
817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
818 _mm512_add_epi32 (__m512i __A, __m512i __B)
820 return (__m512i) ((__v16su) __A + (__v16su) __B);
823 extern __inline __m512i
824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
825 _mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
827 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
828 (__v16si) __B,
829 (__v16si) __W,
830 (__mmask16) __U);
833 extern __inline __m512i
834 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
835 _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
837 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
838 (__v16si) __B,
839 (__v16si)
840 _mm512_setzero_si512 (),
841 (__mmask16) __U);
844 extern __inline __m512i
845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
846 _mm512_mul_epi32 (__m512i __X, __m512i __Y)
848 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
849 (__v16si) __Y,
850 (__v8di)
851 _mm512_undefined_epi32 (),
852 (__mmask8) -1);
855 extern __inline __m512i
856 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
857 _mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
859 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
860 (__v16si) __Y,
861 (__v8di) __W, __M);
864 extern __inline __m512i
865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
866 _mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
868 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
869 (__v16si) __Y,
870 (__v8di)
871 _mm512_setzero_si512 (),
872 __M);
875 extern __inline __m512i
876 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
877 _mm512_sub_epi32 (__m512i __A, __m512i __B)
879 return (__m512i) ((__v16su) __A - (__v16su) __B);
882 extern __inline __m512i
883 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
884 _mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
886 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
887 (__v16si) __B,
888 (__v16si) __W,
889 (__mmask16) __U);
892 extern __inline __m512i
893 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
894 _mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
896 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
897 (__v16si) __B,
898 (__v16si)
899 _mm512_setzero_si512 (),
900 (__mmask16) __U);
903 extern __inline __m512i
904 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
905 _mm512_mul_epu32 (__m512i __X, __m512i __Y)
907 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
908 (__v16si) __Y,
909 (__v8di)
910 _mm512_undefined_epi32 (),
911 (__mmask8) -1);
914 extern __inline __m512i
915 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
916 _mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
918 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
919 (__v16si) __Y,
920 (__v8di) __W, __M);
923 extern __inline __m512i
924 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
925 _mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
927 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
928 (__v16si) __Y,
929 (__v8di)
930 _mm512_setzero_si512 (),
931 __M);
934 #ifdef __OPTIMIZE__
935 extern __inline __m512i
936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
937 _mm512_slli_epi64 (__m512i __A, unsigned int __B)
939 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
940 (__v8di)
941 _mm512_undefined_epi32 (),
942 (__mmask8) -1);
945 extern __inline __m512i
946 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
947 _mm512_mask_slli_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
948 unsigned int __B)
950 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
951 (__v8di) __W,
952 (__mmask8) __U);
955 extern __inline __m512i
956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
957 _mm512_maskz_slli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
959 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
960 (__v8di)
961 _mm512_setzero_si512 (),
962 (__mmask8) __U);
964 #else
965 #define _mm512_slli_epi64(X, C) \
966 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
967 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
968 (__mmask8)-1))
970 #define _mm512_mask_slli_epi64(W, U, X, C) \
971 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
972 (__v8di)(__m512i)(W),\
973 (__mmask8)(U)))
975 #define _mm512_maskz_slli_epi64(U, X, C) \
976 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
977 (__v8di)(__m512i)_mm512_setzero_si512 (),\
978 (__mmask8)(U)))
979 #endif
981 extern __inline __m512i
982 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
983 _mm512_sll_epi64 (__m512i __A, __m128i __B)
985 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
986 (__v2di) __B,
987 (__v8di)
988 _mm512_undefined_epi32 (),
989 (__mmask8) -1);
992 extern __inline __m512i
993 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
994 _mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
996 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
997 (__v2di) __B,
998 (__v8di) __W,
999 (__mmask8) __U);
1002 extern __inline __m512i
1003 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1004 _mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1006 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
1007 (__v2di) __B,
1008 (__v8di)
1009 _mm512_setzero_si512 (),
1010 (__mmask8) __U);
1013 #ifdef __OPTIMIZE__
1014 extern __inline __m512i
1015 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1016 _mm512_srli_epi64 (__m512i __A, unsigned int __B)
1018 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1019 (__v8di)
1020 _mm512_undefined_epi32 (),
1021 (__mmask8) -1);
1024 extern __inline __m512i
1025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1026 _mm512_mask_srli_epi64 (__m512i __W, __mmask8 __U,
1027 __m512i __A, unsigned int __B)
1029 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1030 (__v8di) __W,
1031 (__mmask8) __U);
1034 extern __inline __m512i
1035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1036 _mm512_maskz_srli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1038 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1039 (__v8di)
1040 _mm512_setzero_si512 (),
1041 (__mmask8) __U);
1043 #else
1044 #define _mm512_srli_epi64(X, C) \
1045 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1046 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
1047 (__mmask8)-1))
1049 #define _mm512_mask_srli_epi64(W, U, X, C) \
1050 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1051 (__v8di)(__m512i)(W),\
1052 (__mmask8)(U)))
1054 #define _mm512_maskz_srli_epi64(U, X, C) \
1055 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1056 (__v8di)(__m512i)_mm512_setzero_si512 (),\
1057 (__mmask8)(U)))
1058 #endif
1060 extern __inline __m512i
1061 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1062 _mm512_srl_epi64 (__m512i __A, __m128i __B)
1064 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1065 (__v2di) __B,
1066 (__v8di)
1067 _mm512_undefined_epi32 (),
1068 (__mmask8) -1);
1071 extern __inline __m512i
1072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1073 _mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1075 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1076 (__v2di) __B,
1077 (__v8di) __W,
1078 (__mmask8) __U);
1081 extern __inline __m512i
1082 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1083 _mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1085 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1086 (__v2di) __B,
1087 (__v8di)
1088 _mm512_setzero_si512 (),
1089 (__mmask8) __U);
1092 #ifdef __OPTIMIZE__
1093 extern __inline __m512i
1094 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1095 _mm512_srai_epi64 (__m512i __A, unsigned int __B)
1097 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1098 (__v8di)
1099 _mm512_undefined_epi32 (),
1100 (__mmask8) -1);
1103 extern __inline __m512i
1104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1105 _mm512_mask_srai_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
1106 unsigned int __B)
1108 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1109 (__v8di) __W,
1110 (__mmask8) __U);
1113 extern __inline __m512i
1114 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1115 _mm512_maskz_srai_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1117 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1118 (__v8di)
1119 _mm512_setzero_si512 (),
1120 (__mmask8) __U);
1122 #else
1123 #define _mm512_srai_epi64(X, C) \
1124 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1125 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
1126 (__mmask8)-1))
1128 #define _mm512_mask_srai_epi64(W, U, X, C) \
1129 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1130 (__v8di)(__m512i)(W),\
1131 (__mmask8)(U)))
1133 #define _mm512_maskz_srai_epi64(U, X, C) \
1134 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1135 (__v8di)(__m512i)_mm512_setzero_si512 (),\
1136 (__mmask8)(U)))
1137 #endif
1139 extern __inline __m512i
1140 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1141 _mm512_sra_epi64 (__m512i __A, __m128i __B)
1143 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1144 (__v2di) __B,
1145 (__v8di)
1146 _mm512_undefined_epi32 (),
1147 (__mmask8) -1);
1150 extern __inline __m512i
1151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1152 _mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1154 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1155 (__v2di) __B,
1156 (__v8di) __W,
1157 (__mmask8) __U);
1160 extern __inline __m512i
1161 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1162 _mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1164 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1165 (__v2di) __B,
1166 (__v8di)
1167 _mm512_setzero_si512 (),
1168 (__mmask8) __U);
1171 #ifdef __OPTIMIZE__
1172 extern __inline __m512i
1173 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1174 _mm512_slli_epi32 (__m512i __A, unsigned int __B)
1176 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1177 (__v16si)
1178 _mm512_undefined_epi32 (),
1179 (__mmask16) -1);
1182 extern __inline __m512i
1183 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1184 _mm512_mask_slli_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1185 unsigned int __B)
1187 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1188 (__v16si) __W,
1189 (__mmask16) __U);
1192 extern __inline __m512i
1193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1194 _mm512_maskz_slli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1196 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1197 (__v16si)
1198 _mm512_setzero_si512 (),
1199 (__mmask16) __U);
1201 #else
1202 #define _mm512_slli_epi32(X, C) \
1203 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1204 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
1205 (__mmask16)-1))
1207 #define _mm512_mask_slli_epi32(W, U, X, C) \
1208 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1209 (__v16si)(__m512i)(W),\
1210 (__mmask16)(U)))
1212 #define _mm512_maskz_slli_epi32(U, X, C) \
1213 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1214 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1215 (__mmask16)(U)))
1216 #endif
1218 extern __inline __m512i
1219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1220 _mm512_sll_epi32 (__m512i __A, __m128i __B)
1222 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1223 (__v4si) __B,
1224 (__v16si)
1225 _mm512_undefined_epi32 (),
1226 (__mmask16) -1);
1229 extern __inline __m512i
1230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1231 _mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1233 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1234 (__v4si) __B,
1235 (__v16si) __W,
1236 (__mmask16) __U);
1239 extern __inline __m512i
1240 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1241 _mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1243 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1244 (__v4si) __B,
1245 (__v16si)
1246 _mm512_setzero_si512 (),
1247 (__mmask16) __U);
1250 #ifdef __OPTIMIZE__
1251 extern __inline __m512i
1252 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1253 _mm512_srli_epi32 (__m512i __A, unsigned int __B)
1255 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1256 (__v16si)
1257 _mm512_undefined_epi32 (),
1258 (__mmask16) -1);
1261 extern __inline __m512i
1262 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1263 _mm512_mask_srli_epi32 (__m512i __W, __mmask16 __U,
1264 __m512i __A, unsigned int __B)
1266 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1267 (__v16si) __W,
1268 (__mmask16) __U);
1271 extern __inline __m512i
1272 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1273 _mm512_maskz_srli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1275 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1276 (__v16si)
1277 _mm512_setzero_si512 (),
1278 (__mmask16) __U);
1280 #else
1281 #define _mm512_srli_epi32(X, C) \
1282 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1283 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
1284 (__mmask16)-1))
1286 #define _mm512_mask_srli_epi32(W, U, X, C) \
1287 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1288 (__v16si)(__m512i)(W),\
1289 (__mmask16)(U)))
1291 #define _mm512_maskz_srli_epi32(U, X, C) \
1292 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1293 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1294 (__mmask16)(U)))
1295 #endif
1297 extern __inline __m512i
1298 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1299 _mm512_srl_epi32 (__m512i __A, __m128i __B)
1301 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1302 (__v4si) __B,
1303 (__v16si)
1304 _mm512_undefined_epi32 (),
1305 (__mmask16) -1);
1308 extern __inline __m512i
1309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1310 _mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1312 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1313 (__v4si) __B,
1314 (__v16si) __W,
1315 (__mmask16) __U);
1318 extern __inline __m512i
1319 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1320 _mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1322 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1323 (__v4si) __B,
1324 (__v16si)
1325 _mm512_setzero_si512 (),
1326 (__mmask16) __U);
1329 #ifdef __OPTIMIZE__
1330 extern __inline __m512i
1331 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1332 _mm512_srai_epi32 (__m512i __A, unsigned int __B)
1334 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1335 (__v16si)
1336 _mm512_undefined_epi32 (),
1337 (__mmask16) -1);
1340 extern __inline __m512i
1341 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1342 _mm512_mask_srai_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1343 unsigned int __B)
1345 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1346 (__v16si) __W,
1347 (__mmask16) __U);
1350 extern __inline __m512i
1351 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1352 _mm512_maskz_srai_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1354 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1355 (__v16si)
1356 _mm512_setzero_si512 (),
1357 (__mmask16) __U);
1359 #else
1360 #define _mm512_srai_epi32(X, C) \
1361 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1362 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
1363 (__mmask16)-1))
1365 #define _mm512_mask_srai_epi32(W, U, X, C) \
1366 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1367 (__v16si)(__m512i)(W),\
1368 (__mmask16)(U)))
1370 #define _mm512_maskz_srai_epi32(U, X, C) \
1371 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1372 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1373 (__mmask16)(U)))
1374 #endif
1376 extern __inline __m512i
1377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1378 _mm512_sra_epi32 (__m512i __A, __m128i __B)
1380 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1381 (__v4si) __B,
1382 (__v16si)
1383 _mm512_undefined_epi32 (),
1384 (__mmask16) -1);
1387 extern __inline __m512i
1388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1389 _mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1391 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1392 (__v4si) __B,
1393 (__v16si) __W,
1394 (__mmask16) __U);
1397 extern __inline __m512i
1398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1399 _mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1401 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1402 (__v4si) __B,
1403 (__v16si)
1404 _mm512_setzero_si512 (),
1405 (__mmask16) __U);
1408 #ifdef __OPTIMIZE__
1409 extern __inline __m128d
1410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1411 _mm_add_round_sd (__m128d __A, __m128d __B, const int __R)
1413 return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
1414 (__v2df) __B,
1415 __R);
1418 extern __inline __m128d
1419 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1420 _mm_mask_add_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
1421 __m128d __B, const int __R)
1423 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
1424 (__v2df) __B,
1425 (__v2df) __W,
1426 (__mmask8) __U, __R);
1429 extern __inline __m128d
1430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1431 _mm_maskz_add_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
1432 const int __R)
1434 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
1435 (__v2df) __B,
1436 (__v2df)
1437 _mm_setzero_pd (),
1438 (__mmask8) __U, __R);
1441 extern __inline __m128
1442 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1443 _mm_add_round_ss (__m128 __A, __m128 __B, const int __R)
1445 return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
1446 (__v4sf) __B,
1447 __R);
1450 extern __inline __m128
1451 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1452 _mm_mask_add_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
1453 __m128 __B, const int __R)
1455 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
1456 (__v4sf) __B,
1457 (__v4sf) __W,
1458 (__mmask8) __U, __R);
1461 extern __inline __m128
1462 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1463 _mm_maskz_add_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
1464 const int __R)
1466 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
1467 (__v4sf) __B,
1468 (__v4sf)
1469 _mm_setzero_ps (),
1470 (__mmask8) __U, __R);
1473 extern __inline __m128d
1474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1475 _mm_sub_round_sd (__m128d __A, __m128d __B, const int __R)
1477 return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
1478 (__v2df) __B,
1479 __R);
1482 extern __inline __m128d
1483 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1484 _mm_mask_sub_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
1485 __m128d __B, const int __R)
1487 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
1488 (__v2df) __B,
1489 (__v2df) __W,
1490 (__mmask8) __U, __R);
1493 extern __inline __m128d
1494 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1495 _mm_maskz_sub_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
1496 const int __R)
1498 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
1499 (__v2df) __B,
1500 (__v2df)
1501 _mm_setzero_pd (),
1502 (__mmask8) __U, __R);
1505 extern __inline __m128
1506 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1507 _mm_sub_round_ss (__m128 __A, __m128 __B, const int __R)
1509 return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
1510 (__v4sf) __B,
1511 __R);
1514 extern __inline __m128
1515 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1516 _mm_mask_sub_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
1517 __m128 __B, const int __R)
1519 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
1520 (__v4sf) __B,
1521 (__v4sf) __W,
1522 (__mmask8) __U, __R);
1525 extern __inline __m128
1526 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1527 _mm_maskz_sub_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
1528 const int __R)
1530 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
1531 (__v4sf) __B,
1532 (__v4sf)
1533 _mm_setzero_ps (),
1534 (__mmask8) __U, __R);
1537 #else
1538 #define _mm_add_round_sd(A, B, C) \
1539 (__m128d)__builtin_ia32_addsd_round(A, B, C)
1541 #define _mm_mask_add_round_sd(W, U, A, B, C) \
1542 (__m128d)__builtin_ia32_addsd_mask_round(A, B, W, U, C)
1544 #define _mm_maskz_add_round_sd(U, A, B, C) \
1545 (__m128d)__builtin_ia32_addsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
1547 #define _mm_add_round_ss(A, B, C) \
1548 (__m128)__builtin_ia32_addss_round(A, B, C)
1550 #define _mm_mask_add_round_ss(W, U, A, B, C) \
1551 (__m128)__builtin_ia32_addss_mask_round(A, B, W, U, C)
1553 #define _mm_maskz_add_round_ss(U, A, B, C) \
1554 (__m128)__builtin_ia32_addss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
1556 #define _mm_sub_round_sd(A, B, C) \
1557 (__m128d)__builtin_ia32_subsd_round(A, B, C)
1559 #define _mm_mask_sub_round_sd(W, U, A, B, C) \
1560 (__m128d)__builtin_ia32_subsd_mask_round(A, B, W, U, C)
1562 #define _mm_maskz_sub_round_sd(U, A, B, C) \
1563 (__m128d)__builtin_ia32_subsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
1565 #define _mm_sub_round_ss(A, B, C) \
1566 (__m128)__builtin_ia32_subss_round(A, B, C)
1568 #define _mm_mask_sub_round_ss(W, U, A, B, C) \
1569 (__m128)__builtin_ia32_subss_mask_round(A, B, W, U, C)
1571 #define _mm_maskz_sub_round_ss(U, A, B, C) \
1572 (__m128)__builtin_ia32_subss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
1574 #endif
1576 #ifdef __OPTIMIZE__
1577 extern __inline __m512i
1578 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1579 _mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C,
1580 const int __imm)
1582 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1583 (__v8di) __B,
1584 (__v8di) __C, __imm,
1585 (__mmask8) -1);
1588 extern __inline __m512i
1589 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1590 _mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B,
1591 __m512i __C, const int __imm)
1593 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1594 (__v8di) __B,
1595 (__v8di) __C, __imm,
1596 (__mmask8) __U);
1599 extern __inline __m512i
1600 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1601 _mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
1602 __m512i __C, const int __imm)
1604 return (__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di) __A,
1605 (__v8di) __B,
1606 (__v8di) __C,
1607 __imm, (__mmask8) __U);
1610 extern __inline __m512i
1611 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1612 _mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C,
1613 const int __imm)
1615 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1616 (__v16si) __B,
1617 (__v16si) __C,
1618 __imm, (__mmask16) -1);
1621 extern __inline __m512i
1622 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1623 _mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
1624 __m512i __C, const int __imm)
1626 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1627 (__v16si) __B,
1628 (__v16si) __C,
1629 __imm, (__mmask16) __U);
1632 extern __inline __m512i
1633 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1634 _mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
1635 __m512i __C, const int __imm)
1637 return (__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si) __A,
1638 (__v16si) __B,
1639 (__v16si) __C,
1640 __imm, (__mmask16) __U);
1642 #else
1643 #define _mm512_ternarylogic_epi64(A, B, C, I) \
1644 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
1645 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)-1))
1646 #define _mm512_mask_ternarylogic_epi64(A, U, B, C, I) \
1647 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
1648 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1649 #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I) \
1650 ((__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di)(__m512i)(A), \
1651 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1652 #define _mm512_ternarylogic_epi32(A, B, C, I) \
1653 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
1654 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1655 (__mmask16)-1))
1656 #define _mm512_mask_ternarylogic_epi32(A, U, B, C, I) \
1657 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
1658 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1659 (__mmask16)(U)))
1660 #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I) \
1661 ((__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si)(__m512i)(A), \
1662 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1663 (__mmask16)(U)))
1664 #endif
1666 extern __inline __m512d
1667 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1668 _mm512_rcp14_pd (__m512d __A)
1670 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1671 (__v8df)
1672 _mm512_undefined_pd (),
1673 (__mmask8) -1);
1676 extern __inline __m512d
1677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1678 _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1680 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1681 (__v8df) __W,
1682 (__mmask8) __U);
1685 extern __inline __m512d
1686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1687 _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
1689 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1690 (__v8df)
1691 _mm512_setzero_pd (),
1692 (__mmask8) __U);
1695 extern __inline __m512
1696 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1697 _mm512_rcp14_ps (__m512 __A)
1699 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1700 (__v16sf)
1701 _mm512_undefined_ps (),
1702 (__mmask16) -1);
1705 extern __inline __m512
1706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1707 _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1709 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1710 (__v16sf) __W,
1711 (__mmask16) __U);
1714 extern __inline __m512
1715 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1716 _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
1718 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1719 (__v16sf)
1720 _mm512_setzero_ps (),
1721 (__mmask16) __U);
1724 extern __inline __m128d
1725 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1726 _mm_rcp14_sd (__m128d __A, __m128d __B)
1728 return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B,
1729 (__v2df) __A);
1732 extern __inline __m128d
1733 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1734 _mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1736 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __B,
1737 (__v2df) __A,
1738 (__v2df) __W,
1739 (__mmask8) __U);
1742 extern __inline __m128d
1743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1744 _mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1746 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __B,
1747 (__v2df) __A,
1748 (__v2df) _mm_setzero_ps (),
1749 (__mmask8) __U);
1752 extern __inline __m128
1753 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1754 _mm_rcp14_ss (__m128 __A, __m128 __B)
1756 return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B,
1757 (__v4sf) __A);
1760 extern __inline __m128
1761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1762 _mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1764 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __B,
1765 (__v4sf) __A,
1766 (__v4sf) __W,
1767 (__mmask8) __U);
1770 extern __inline __m128
1771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1772 _mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1774 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __B,
1775 (__v4sf) __A,
1776 (__v4sf) _mm_setzero_ps (),
1777 (__mmask8) __U);
1780 extern __inline __m512d
1781 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1782 _mm512_rsqrt14_pd (__m512d __A)
1784 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1785 (__v8df)
1786 _mm512_undefined_pd (),
1787 (__mmask8) -1);
1790 extern __inline __m512d
1791 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1792 _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1794 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1795 (__v8df) __W,
1796 (__mmask8) __U);
1799 extern __inline __m512d
1800 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1801 _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
1803 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1804 (__v8df)
1805 _mm512_setzero_pd (),
1806 (__mmask8) __U);
1809 extern __inline __m512
1810 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1811 _mm512_rsqrt14_ps (__m512 __A)
1813 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1814 (__v16sf)
1815 _mm512_undefined_ps (),
1816 (__mmask16) -1);
1819 extern __inline __m512
1820 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1821 _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1823 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1824 (__v16sf) __W,
1825 (__mmask16) __U);
1828 extern __inline __m512
1829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1830 _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
1832 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1833 (__v16sf)
1834 _mm512_setzero_ps (),
1835 (__mmask16) __U);
1838 extern __inline __m128d
1839 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1840 _mm_rsqrt14_sd (__m128d __A, __m128d __B)
1842 return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __B,
1843 (__v2df) __A);
1846 extern __inline __m128d
1847 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1848 _mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1850 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B,
1851 (__v2df) __A,
1852 (__v2df) __W,
1853 (__mmask8) __U);
1856 extern __inline __m128d
1857 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1858 _mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1860 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B,
1861 (__v2df) __A,
1862 (__v2df) _mm_setzero_pd (),
1863 (__mmask8) __U);
1866 extern __inline __m128
1867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1868 _mm_rsqrt14_ss (__m128 __A, __m128 __B)
1870 return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __B,
1871 (__v4sf) __A);
1874 extern __inline __m128
1875 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1876 _mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1878 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B,
1879 (__v4sf) __A,
1880 (__v4sf) __W,
1881 (__mmask8) __U);
1884 extern __inline __m128
1885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1886 _mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1888 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B,
1889 (__v4sf) __A,
1890 (__v4sf) _mm_setzero_ps (),
1891 (__mmask8) __U);
1894 #ifdef __OPTIMIZE__
1895 extern __inline __m512d
1896 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1897 _mm512_sqrt_round_pd (__m512d __A, const int __R)
1899 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1900 (__v8df)
1901 _mm512_undefined_pd (),
1902 (__mmask8) -1, __R);
1905 extern __inline __m512d
1906 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1907 _mm512_mask_sqrt_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
1908 const int __R)
1910 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1911 (__v8df) __W,
1912 (__mmask8) __U, __R);
1915 extern __inline __m512d
1916 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1917 _mm512_maskz_sqrt_round_pd (__mmask8 __U, __m512d __A, const int __R)
1919 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1920 (__v8df)
1921 _mm512_setzero_pd (),
1922 (__mmask8) __U, __R);
1925 extern __inline __m512
1926 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1927 _mm512_sqrt_round_ps (__m512 __A, const int __R)
1929 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1930 (__v16sf)
1931 _mm512_undefined_ps (),
1932 (__mmask16) -1, __R);
1935 extern __inline __m512
1936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1937 _mm512_mask_sqrt_round_ps (__m512 __W, __mmask16 __U, __m512 __A, const int __R)
1939 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1940 (__v16sf) __W,
1941 (__mmask16) __U, __R);
1944 extern __inline __m512
1945 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1946 _mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R)
1948 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1949 (__v16sf)
1950 _mm512_setzero_ps (),
1951 (__mmask16) __U, __R);
1954 extern __inline __m128d
1955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1956 _mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R)
1958 return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B,
1959 (__v2df) __A,
1960 (__v2df)
1961 _mm_setzero_pd (),
1962 (__mmask8) -1, __R);
1965 extern __inline __m128d
1966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1967 _mm_mask_sqrt_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
1968 const int __R)
1970 return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B,
1971 (__v2df) __A,
1972 (__v2df) __W,
1973 (__mmask8) __U, __R);
1976 extern __inline __m128d
1977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1978 _mm_maskz_sqrt_round_sd (__mmask8 __U, __m128d __A, __m128d __B, const int __R)
1980 return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B,
1981 (__v2df) __A,
1982 (__v2df)
1983 _mm_setzero_pd (),
1984 (__mmask8) __U, __R);
1987 extern __inline __m128
1988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1989 _mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R)
1991 return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B,
1992 (__v4sf) __A,
1993 (__v4sf)
1994 _mm_setzero_ps (),
1995 (__mmask8) -1, __R);
1998 extern __inline __m128
1999 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2000 _mm_mask_sqrt_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
2001 const int __R)
2003 return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B,
2004 (__v4sf) __A,
2005 (__v4sf) __W,
2006 (__mmask8) __U, __R);
2009 extern __inline __m128
2010 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2011 _mm_maskz_sqrt_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
2013 return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B,
2014 (__v4sf) __A,
2015 (__v4sf)
2016 _mm_setzero_ps (),
2017 (__mmask8) __U, __R);
2019 #else
2020 #define _mm512_sqrt_round_pd(A, C) \
2021 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, C)
2023 #define _mm512_mask_sqrt_round_pd(W, U, A, C) \
2024 (__m512d)__builtin_ia32_sqrtpd512_mask(A, W, U, C)
2026 #define _mm512_maskz_sqrt_round_pd(U, A, C) \
2027 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
2029 #define _mm512_sqrt_round_ps(A, C) \
2030 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_undefined_ps(), -1, C)
2032 #define _mm512_mask_sqrt_round_ps(W, U, A, C) \
2033 (__m512)__builtin_ia32_sqrtps512_mask(A, W, U, C)
2035 #define _mm512_maskz_sqrt_round_ps(U, A, C) \
2036 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
2038 #define _mm_sqrt_round_sd(A, B, C) \
2039 (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, \
2040 (__v2df) _mm_setzero_pd (), -1, C)
2042 #define _mm_mask_sqrt_round_sd(W, U, A, B, C) \
2043 (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, W, U, C)
2045 #define _mm_maskz_sqrt_round_sd(U, A, B, C) \
2046 (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, \
2047 (__v2df) _mm_setzero_pd (), U, C)
2049 #define _mm_sqrt_round_ss(A, B, C) \
2050 (__m128)__builtin_ia32_sqrtss_mask_round (B, A, \
2051 (__v4sf) _mm_setzero_ps (), -1, C)
2053 #define _mm_mask_sqrt_round_ss(W, U, A, B, C) \
2054 (__m128)__builtin_ia32_sqrtss_mask_round (B, A, W, U, C)
2056 #define _mm_maskz_sqrt_round_ss(U, A, B, C) \
2057 (__m128)__builtin_ia32_sqrtss_mask_round (B, A, \
2058 (__v4sf) _mm_setzero_ps (), U, C)
2059 #endif
2061 extern __inline __m512i
2062 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2063 _mm512_cvtepi8_epi32 (__m128i __A)
2065 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
2066 (__v16si)
2067 _mm512_undefined_epi32 (),
2068 (__mmask16) -1);
2071 extern __inline __m512i
2072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2073 _mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
2075 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
2076 (__v16si) __W,
2077 (__mmask16) __U);
2080 extern __inline __m512i
2081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2082 _mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A)
2084 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
2085 (__v16si)
2086 _mm512_setzero_si512 (),
2087 (__mmask16) __U);
2090 extern __inline __m512i
2091 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2092 _mm512_cvtepi8_epi64 (__m128i __A)
2094 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
2095 (__v8di)
2096 _mm512_undefined_epi32 (),
2097 (__mmask8) -1);
2100 extern __inline __m512i
2101 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2102 _mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2104 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
2105 (__v8di) __W,
2106 (__mmask8) __U);
2109 extern __inline __m512i
2110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2111 _mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
2113 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
2114 (__v8di)
2115 _mm512_setzero_si512 (),
2116 (__mmask8) __U);
2119 extern __inline __m512i
2120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2121 _mm512_cvtepi16_epi32 (__m256i __A)
2123 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
2124 (__v16si)
2125 _mm512_undefined_epi32 (),
2126 (__mmask16) -1);
2129 extern __inline __m512i
2130 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2131 _mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
2133 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
2134 (__v16si) __W,
2135 (__mmask16) __U);
2138 extern __inline __m512i
2139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2140 _mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A)
2142 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
2143 (__v16si)
2144 _mm512_setzero_si512 (),
2145 (__mmask16) __U);
2148 extern __inline __m512i
2149 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2150 _mm512_cvtepi16_epi64 (__m128i __A)
2152 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
2153 (__v8di)
2154 _mm512_undefined_epi32 (),
2155 (__mmask8) -1);
2158 extern __inline __m512i
2159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2160 _mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2162 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
2163 (__v8di) __W,
2164 (__mmask8) __U);
2167 extern __inline __m512i
2168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2169 _mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
2171 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
2172 (__v8di)
2173 _mm512_setzero_si512 (),
2174 (__mmask8) __U);
2177 extern __inline __m512i
2178 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2179 _mm512_cvtepi32_epi64 (__m256i __X)
2181 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
2182 (__v8di)
2183 _mm512_undefined_epi32 (),
2184 (__mmask8) -1);
2187 extern __inline __m512i
2188 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2189 _mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
2191 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
2192 (__v8di) __W,
2193 (__mmask8) __U);
2196 extern __inline __m512i
2197 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2198 _mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X)
2200 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
2201 (__v8di)
2202 _mm512_setzero_si512 (),
2203 (__mmask8) __U);
2206 extern __inline __m512i
2207 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2208 _mm512_cvtepu8_epi32 (__m128i __A)
2210 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
2211 (__v16si)
2212 _mm512_undefined_epi32 (),
2213 (__mmask16) -1);
2216 extern __inline __m512i
2217 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2218 _mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
2220 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
2221 (__v16si) __W,
2222 (__mmask16) __U);
2225 extern __inline __m512i
2226 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2227 _mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A)
2229 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
2230 (__v16si)
2231 _mm512_setzero_si512 (),
2232 (__mmask16) __U);
2235 extern __inline __m512i
2236 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2237 _mm512_cvtepu8_epi64 (__m128i __A)
2239 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
2240 (__v8di)
2241 _mm512_undefined_epi32 (),
2242 (__mmask8) -1);
2245 extern __inline __m512i
2246 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2247 _mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2249 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
2250 (__v8di) __W,
2251 (__mmask8) __U);
2254 extern __inline __m512i
2255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2256 _mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
2258 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
2259 (__v8di)
2260 _mm512_setzero_si512 (),
2261 (__mmask8) __U);
2264 extern __inline __m512i
2265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2266 _mm512_cvtepu16_epi32 (__m256i __A)
2268 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2269 (__v16si)
2270 _mm512_undefined_epi32 (),
2271 (__mmask16) -1);
2274 extern __inline __m512i
2275 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2276 _mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
2278 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2279 (__v16si) __W,
2280 (__mmask16) __U);
2283 extern __inline __m512i
2284 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2285 _mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A)
2287 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2288 (__v16si)
2289 _mm512_setzero_si512 (),
2290 (__mmask16) __U);
2293 extern __inline __m512i
2294 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2295 _mm512_cvtepu16_epi64 (__m128i __A)
2297 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2298 (__v8di)
2299 _mm512_undefined_epi32 (),
2300 (__mmask8) -1);
2303 extern __inline __m512i
2304 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2305 _mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2307 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2308 (__v8di) __W,
2309 (__mmask8) __U);
2312 extern __inline __m512i
2313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2314 _mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
2316 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2317 (__v8di)
2318 _mm512_setzero_si512 (),
2319 (__mmask8) __U);
2322 extern __inline __m512i
2323 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2324 _mm512_cvtepu32_epi64 (__m256i __X)
2326 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2327 (__v8di)
2328 _mm512_undefined_epi32 (),
2329 (__mmask8) -1);
2332 extern __inline __m512i
2333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2334 _mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
2336 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2337 (__v8di) __W,
2338 (__mmask8) __U);
2341 extern __inline __m512i
2342 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2343 _mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X)
2345 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2346 (__v8di)
2347 _mm512_setzero_si512 (),
2348 (__mmask8) __U);
2351 #ifdef __OPTIMIZE__
2352 extern __inline __m512d
2353 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2354 _mm512_add_round_pd (__m512d __A, __m512d __B, const int __R)
2356 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2357 (__v8df) __B,
2358 (__v8df)
2359 _mm512_undefined_pd (),
2360 (__mmask8) -1, __R);
2363 extern __inline __m512d
2364 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2365 _mm512_mask_add_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2366 __m512d __B, const int __R)
2368 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2369 (__v8df) __B,
2370 (__v8df) __W,
2371 (__mmask8) __U, __R);
2374 extern __inline __m512d
2375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2376 _mm512_maskz_add_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2377 const int __R)
2379 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2380 (__v8df) __B,
2381 (__v8df)
2382 _mm512_setzero_pd (),
2383 (__mmask8) __U, __R);
2386 extern __inline __m512
2387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2388 _mm512_add_round_ps (__m512 __A, __m512 __B, const int __R)
2390 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2391 (__v16sf) __B,
2392 (__v16sf)
2393 _mm512_undefined_ps (),
2394 (__mmask16) -1, __R);
2397 extern __inline __m512
2398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2399 _mm512_mask_add_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2400 __m512 __B, const int __R)
2402 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2403 (__v16sf) __B,
2404 (__v16sf) __W,
2405 (__mmask16) __U, __R);
2408 extern __inline __m512
2409 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2410 _mm512_maskz_add_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2412 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2413 (__v16sf) __B,
2414 (__v16sf)
2415 _mm512_setzero_ps (),
2416 (__mmask16) __U, __R);
2419 extern __inline __m512d
2420 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2421 _mm512_sub_round_pd (__m512d __A, __m512d __B, const int __R)
2423 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2424 (__v8df) __B,
2425 (__v8df)
2426 _mm512_undefined_pd (),
2427 (__mmask8) -1, __R);
2430 extern __inline __m512d
2431 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2432 _mm512_mask_sub_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2433 __m512d __B, const int __R)
2435 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2436 (__v8df) __B,
2437 (__v8df) __W,
2438 (__mmask8) __U, __R);
2441 extern __inline __m512d
2442 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2443 _mm512_maskz_sub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2444 const int __R)
2446 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2447 (__v8df) __B,
2448 (__v8df)
2449 _mm512_setzero_pd (),
2450 (__mmask8) __U, __R);
2453 extern __inline __m512
2454 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2455 _mm512_sub_round_ps (__m512 __A, __m512 __B, const int __R)
2457 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2458 (__v16sf) __B,
2459 (__v16sf)
2460 _mm512_undefined_ps (),
2461 (__mmask16) -1, __R);
2464 extern __inline __m512
2465 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2466 _mm512_mask_sub_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2467 __m512 __B, const int __R)
2469 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2470 (__v16sf) __B,
2471 (__v16sf) __W,
2472 (__mmask16) __U, __R);
2475 extern __inline __m512
2476 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2477 _mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2479 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2480 (__v16sf) __B,
2481 (__v16sf)
2482 _mm512_setzero_ps (),
2483 (__mmask16) __U, __R);
2485 #else
2486 #define _mm512_add_round_pd(A, B, C) \
2487 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2489 #define _mm512_mask_add_round_pd(W, U, A, B, C) \
2490 (__m512d)__builtin_ia32_addpd512_mask(A, B, W, U, C)
2492 #define _mm512_maskz_add_round_pd(U, A, B, C) \
2493 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2495 #define _mm512_add_round_ps(A, B, C) \
2496 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2498 #define _mm512_mask_add_round_ps(W, U, A, B, C) \
2499 (__m512)__builtin_ia32_addps512_mask(A, B, W, U, C)
2501 #define _mm512_maskz_add_round_ps(U, A, B, C) \
2502 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2504 #define _mm512_sub_round_pd(A, B, C) \
2505 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2507 #define _mm512_mask_sub_round_pd(W, U, A, B, C) \
2508 (__m512d)__builtin_ia32_subpd512_mask(A, B, W, U, C)
2510 #define _mm512_maskz_sub_round_pd(U, A, B, C) \
2511 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2513 #define _mm512_sub_round_ps(A, B, C) \
2514 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2516 #define _mm512_mask_sub_round_ps(W, U, A, B, C) \
2517 (__m512)__builtin_ia32_subps512_mask(A, B, W, U, C)
2519 #define _mm512_maskz_sub_round_ps(U, A, B, C) \
2520 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2521 #endif
2523 #ifdef __OPTIMIZE__
2524 extern __inline __m512d
2525 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2526 _mm512_mul_round_pd (__m512d __A, __m512d __B, const int __R)
2528 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2529 (__v8df) __B,
2530 (__v8df)
2531 _mm512_undefined_pd (),
2532 (__mmask8) -1, __R);
2535 extern __inline __m512d
2536 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2537 _mm512_mask_mul_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2538 __m512d __B, const int __R)
2540 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2541 (__v8df) __B,
2542 (__v8df) __W,
2543 (__mmask8) __U, __R);
2546 extern __inline __m512d
2547 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2548 _mm512_maskz_mul_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2549 const int __R)
2551 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2552 (__v8df) __B,
2553 (__v8df)
2554 _mm512_setzero_pd (),
2555 (__mmask8) __U, __R);
2558 extern __inline __m512
2559 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2560 _mm512_mul_round_ps (__m512 __A, __m512 __B, const int __R)
2562 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2563 (__v16sf) __B,
2564 (__v16sf)
2565 _mm512_undefined_ps (),
2566 (__mmask16) -1, __R);
2569 extern __inline __m512
2570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2571 _mm512_mask_mul_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2572 __m512 __B, const int __R)
2574 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2575 (__v16sf) __B,
2576 (__v16sf) __W,
2577 (__mmask16) __U, __R);
2580 extern __inline __m512
2581 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2582 _mm512_maskz_mul_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2584 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2585 (__v16sf) __B,
2586 (__v16sf)
2587 _mm512_setzero_ps (),
2588 (__mmask16) __U, __R);
2591 extern __inline __m512d
2592 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2593 _mm512_div_round_pd (__m512d __M, __m512d __V, const int __R)
2595 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2596 (__v8df) __V,
2597 (__v8df)
2598 _mm512_undefined_pd (),
2599 (__mmask8) -1, __R);
2602 extern __inline __m512d
2603 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2604 _mm512_mask_div_round_pd (__m512d __W, __mmask8 __U, __m512d __M,
2605 __m512d __V, const int __R)
2607 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2608 (__v8df) __V,
2609 (__v8df) __W,
2610 (__mmask8) __U, __R);
2613 extern __inline __m512d
2614 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2615 _mm512_maskz_div_round_pd (__mmask8 __U, __m512d __M, __m512d __V,
2616 const int __R)
2618 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2619 (__v8df) __V,
2620 (__v8df)
2621 _mm512_setzero_pd (),
2622 (__mmask8) __U, __R);
2625 extern __inline __m512
2626 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2627 _mm512_div_round_ps (__m512 __A, __m512 __B, const int __R)
2629 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2630 (__v16sf) __B,
2631 (__v16sf)
2632 _mm512_undefined_ps (),
2633 (__mmask16) -1, __R);
2636 extern __inline __m512
2637 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2638 _mm512_mask_div_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2639 __m512 __B, const int __R)
2641 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2642 (__v16sf) __B,
2643 (__v16sf) __W,
2644 (__mmask16) __U, __R);
2647 extern __inline __m512
2648 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2649 _mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2651 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2652 (__v16sf) __B,
2653 (__v16sf)
2654 _mm512_setzero_ps (),
2655 (__mmask16) __U, __R);
2658 extern __inline __m128d
2659 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2660 _mm_mul_round_sd (__m128d __A, __m128d __B, const int __R)
2662 return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
2663 (__v2df) __B,
2664 __R);
2667 extern __inline __m128d
2668 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2669 _mm_mask_mul_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
2670 __m128d __B, const int __R)
2672 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
2673 (__v2df) __B,
2674 (__v2df) __W,
2675 (__mmask8) __U, __R);
2678 extern __inline __m128d
2679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2680 _mm_maskz_mul_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
2681 const int __R)
2683 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
2684 (__v2df) __B,
2685 (__v2df)
2686 _mm_setzero_pd (),
2687 (__mmask8) __U, __R);
2690 extern __inline __m128
2691 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2692 _mm_mul_round_ss (__m128 __A, __m128 __B, const int __R)
2694 return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
2695 (__v4sf) __B,
2696 __R);
2699 extern __inline __m128
2700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2701 _mm_mask_mul_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
2702 __m128 __B, const int __R)
2704 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
2705 (__v4sf) __B,
2706 (__v4sf) __W,
2707 (__mmask8) __U, __R);
2710 extern __inline __m128
2711 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2712 _mm_maskz_mul_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
2713 const int __R)
2715 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
2716 (__v4sf) __B,
2717 (__v4sf)
2718 _mm_setzero_ps (),
2719 (__mmask8) __U, __R);
2722 extern __inline __m128d
2723 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2724 _mm_div_round_sd (__m128d __A, __m128d __B, const int __R)
2726 return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
2727 (__v2df) __B,
2728 __R);
2731 extern __inline __m128d
2732 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2733 _mm_mask_div_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
2734 __m128d __B, const int __R)
2736 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
2737 (__v2df) __B,
2738 (__v2df) __W,
2739 (__mmask8) __U, __R);
2742 extern __inline __m128d
2743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2744 _mm_maskz_div_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
2745 const int __R)
2747 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
2748 (__v2df) __B,
2749 (__v2df)
2750 _mm_setzero_pd (),
2751 (__mmask8) __U, __R);
2754 extern __inline __m128
2755 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2756 _mm_div_round_ss (__m128 __A, __m128 __B, const int __R)
2758 return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
2759 (__v4sf) __B,
2760 __R);
2763 extern __inline __m128
2764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2765 _mm_mask_div_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
2766 __m128 __B, const int __R)
2768 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
2769 (__v4sf) __B,
2770 (__v4sf) __W,
2771 (__mmask8) __U, __R);
2774 extern __inline __m128
2775 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2776 _mm_maskz_div_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
2777 const int __R)
2779 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
2780 (__v4sf) __B,
2781 (__v4sf)
2782 _mm_setzero_ps (),
2783 (__mmask8) __U, __R);
2786 #else
2787 #define _mm512_mul_round_pd(A, B, C) \
2788 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2790 #define _mm512_mask_mul_round_pd(W, U, A, B, C) \
2791 (__m512d)__builtin_ia32_mulpd512_mask(A, B, W, U, C)
2793 #define _mm512_maskz_mul_round_pd(U, A, B, C) \
2794 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2796 #define _mm512_mul_round_ps(A, B, C) \
2797 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2799 #define _mm512_mask_mul_round_ps(W, U, A, B, C) \
2800 (__m512)__builtin_ia32_mulps512_mask(A, B, W, U, C)
2802 #define _mm512_maskz_mul_round_ps(U, A, B, C) \
2803 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2805 #define _mm512_div_round_pd(A, B, C) \
2806 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2808 #define _mm512_mask_div_round_pd(W, U, A, B, C) \
2809 (__m512d)__builtin_ia32_divpd512_mask(A, B, W, U, C)
2811 #define _mm512_maskz_div_round_pd(U, A, B, C) \
2812 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2814 #define _mm512_div_round_ps(A, B, C) \
2815 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2817 #define _mm512_mask_div_round_ps(W, U, A, B, C) \
2818 (__m512)__builtin_ia32_divps512_mask(A, B, W, U, C)
2820 #define _mm512_maskz_div_round_ps(U, A, B, C) \
2821 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2823 #define _mm_mul_round_sd(A, B, C) \
2824 (__m128d)__builtin_ia32_mulsd_round(A, B, C)
2826 #define _mm_mask_mul_round_sd(W, U, A, B, C) \
2827 (__m128d)__builtin_ia32_mulsd_mask_round(A, B, W, U, C)
2829 #define _mm_maskz_mul_round_sd(U, A, B, C) \
2830 (__m128d)__builtin_ia32_mulsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
2832 #define _mm_mul_round_ss(A, B, C) \
2833 (__m128)__builtin_ia32_mulss_round(A, B, C)
2835 #define _mm_mask_mul_round_ss(W, U, A, B, C) \
2836 (__m128)__builtin_ia32_mulss_mask_round(A, B, W, U, C)
2838 #define _mm_maskz_mul_round_ss(U, A, B, C) \
2839 (__m128)__builtin_ia32_mulss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
2841 #define _mm_div_round_sd(A, B, C) \
2842 (__m128d)__builtin_ia32_divsd_round(A, B, C)
2844 #define _mm_mask_div_round_sd(W, U, A, B, C) \
2845 (__m128d)__builtin_ia32_divsd_mask_round(A, B, W, U, C)
2847 #define _mm_maskz_div_round_sd(U, A, B, C) \
2848 (__m128d)__builtin_ia32_divsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
2850 #define _mm_div_round_ss(A, B, C) \
2851 (__m128)__builtin_ia32_divss_round(A, B, C)
2853 #define _mm_mask_div_round_ss(W, U, A, B, C) \
2854 (__m128)__builtin_ia32_divss_mask_round(A, B, W, U, C)
2856 #define _mm_maskz_div_round_ss(U, A, B, C) \
2857 (__m128)__builtin_ia32_divss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
2859 #endif
2861 #ifdef __OPTIMIZE__
2862 extern __inline __m512d
2863 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2864 _mm512_max_round_pd (__m512d __A, __m512d __B, const int __R)
2866 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2867 (__v8df) __B,
2868 (__v8df)
2869 _mm512_undefined_pd (),
2870 (__mmask8) -1, __R);
2873 extern __inline __m512d
2874 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2875 _mm512_mask_max_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2876 __m512d __B, const int __R)
2878 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2879 (__v8df) __B,
2880 (__v8df) __W,
2881 (__mmask8) __U, __R);
2884 extern __inline __m512d
2885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2886 _mm512_maskz_max_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2887 const int __R)
2889 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2890 (__v8df) __B,
2891 (__v8df)
2892 _mm512_setzero_pd (),
2893 (__mmask8) __U, __R);
2896 extern __inline __m512
2897 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2898 _mm512_max_round_ps (__m512 __A, __m512 __B, const int __R)
2900 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2901 (__v16sf) __B,
2902 (__v16sf)
2903 _mm512_undefined_ps (),
2904 (__mmask16) -1, __R);
2907 extern __inline __m512
2908 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2909 _mm512_mask_max_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2910 __m512 __B, const int __R)
2912 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2913 (__v16sf) __B,
2914 (__v16sf) __W,
2915 (__mmask16) __U, __R);
2918 extern __inline __m512
2919 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2920 _mm512_maskz_max_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2922 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2923 (__v16sf) __B,
2924 (__v16sf)
2925 _mm512_setzero_ps (),
2926 (__mmask16) __U, __R);
2929 extern __inline __m512d
2930 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2931 _mm512_min_round_pd (__m512d __A, __m512d __B, const int __R)
2933 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2934 (__v8df) __B,
2935 (__v8df)
2936 _mm512_undefined_pd (),
2937 (__mmask8) -1, __R);
2940 extern __inline __m512d
2941 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2942 _mm512_mask_min_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2943 __m512d __B, const int __R)
2945 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2946 (__v8df) __B,
2947 (__v8df) __W,
2948 (__mmask8) __U, __R);
2951 extern __inline __m512d
2952 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2953 _mm512_maskz_min_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2954 const int __R)
2956 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2957 (__v8df) __B,
2958 (__v8df)
2959 _mm512_setzero_pd (),
2960 (__mmask8) __U, __R);
2963 extern __inline __m512
2964 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2965 _mm512_min_round_ps (__m512 __A, __m512 __B, const int __R)
2967 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2968 (__v16sf) __B,
2969 (__v16sf)
2970 _mm512_undefined_ps (),
2971 (__mmask16) -1, __R);
2974 extern __inline __m512
2975 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2976 _mm512_mask_min_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2977 __m512 __B, const int __R)
2979 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2980 (__v16sf) __B,
2981 (__v16sf) __W,
2982 (__mmask16) __U, __R);
2985 extern __inline __m512
2986 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2987 _mm512_maskz_min_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2989 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2990 (__v16sf) __B,
2991 (__v16sf)
2992 _mm512_setzero_ps (),
2993 (__mmask16) __U, __R);
2995 #else
2996 #define _mm512_max_round_pd(A, B, R) \
2997 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
2999 #define _mm512_mask_max_round_pd(W, U, A, B, R) \
3000 (__m512d)__builtin_ia32_maxpd512_mask(A, B, W, U, R)
3002 #define _mm512_maskz_max_round_pd(U, A, B, R) \
3003 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
3005 #define _mm512_max_round_ps(A, B, R) \
3006 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_undefined_pd(), -1, R)
3008 #define _mm512_mask_max_round_ps(W, U, A, B, R) \
3009 (__m512)__builtin_ia32_maxps512_mask(A, B, W, U, R)
3011 #define _mm512_maskz_max_round_ps(U, A, B, R) \
3012 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
3014 #define _mm512_min_round_pd(A, B, R) \
3015 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
3017 #define _mm512_mask_min_round_pd(W, U, A, B, R) \
3018 (__m512d)__builtin_ia32_minpd512_mask(A, B, W, U, R)
3020 #define _mm512_maskz_min_round_pd(U, A, B, R) \
3021 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
3023 #define _mm512_min_round_ps(A, B, R) \
3024 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, R)
3026 #define _mm512_mask_min_round_ps(W, U, A, B, R) \
3027 (__m512)__builtin_ia32_minps512_mask(A, B, W, U, R)
3029 #define _mm512_maskz_min_round_ps(U, A, B, R) \
3030 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
3031 #endif
3033 #ifdef __OPTIMIZE__
3034 extern __inline __m512d
3035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3036 _mm512_scalef_round_pd (__m512d __A, __m512d __B, const int __R)
3038 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
3039 (__v8df) __B,
3040 (__v8df)
3041 _mm512_undefined_pd (),
3042 (__mmask8) -1, __R);
3045 extern __inline __m512d
3046 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3047 _mm512_mask_scalef_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
3048 __m512d __B, const int __R)
3050 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
3051 (__v8df) __B,
3052 (__v8df) __W,
3053 (__mmask8) __U, __R);
3056 extern __inline __m512d
3057 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3058 _mm512_maskz_scalef_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3059 const int __R)
3061 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
3062 (__v8df) __B,
3063 (__v8df)
3064 _mm512_setzero_pd (),
3065 (__mmask8) __U, __R);
3068 extern __inline __m512
3069 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3070 _mm512_scalef_round_ps (__m512 __A, __m512 __B, const int __R)
3072 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
3073 (__v16sf) __B,
3074 (__v16sf)
3075 _mm512_undefined_ps (),
3076 (__mmask16) -1, __R);
3079 extern __inline __m512
3080 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3081 _mm512_mask_scalef_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
3082 __m512 __B, const int __R)
3084 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
3085 (__v16sf) __B,
3086 (__v16sf) __W,
3087 (__mmask16) __U, __R);
3090 extern __inline __m512
3091 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3092 _mm512_maskz_scalef_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3093 const int __R)
3095 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
3096 (__v16sf) __B,
3097 (__v16sf)
3098 _mm512_setzero_ps (),
3099 (__mmask16) __U, __R);
3102 extern __inline __m128d
3103 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3104 _mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R)
3106 return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
3107 (__v2df) __B,
3108 (__v2df)
3109 _mm_setzero_pd (),
3110 (__mmask8) -1, __R);
3113 extern __inline __m128d
3114 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3115 _mm_mask_scalef_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
3116 const int __R)
3118 return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
3119 (__v2df) __B,
3120 (__v2df) __W,
3121 (__mmask8) __U, __R);
3124 extern __inline __m128d
3125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3126 _mm_maskz_scalef_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
3127 const int __R)
3129 return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
3130 (__v2df) __B,
3131 (__v2df)
3132 _mm_setzero_pd (),
3133 (__mmask8) __U, __R);
3136 extern __inline __m128
3137 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3138 _mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R)
3140 return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
3141 (__v4sf) __B,
3142 (__v4sf)
3143 _mm_setzero_ps (),
3144 (__mmask8) -1, __R);
3147 extern __inline __m128
3148 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3149 _mm_mask_scalef_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
3150 const int __R)
3152 return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
3153 (__v4sf) __B,
3154 (__v4sf) __W,
3155 (__mmask8) __U, __R);
3158 extern __inline __m128
3159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3160 _mm_maskz_scalef_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
3162 return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
3163 (__v4sf) __B,
3164 (__v4sf)
3165 _mm_setzero_ps (),
3166 (__mmask8) __U, __R);
3168 #else
3169 #define _mm512_scalef_round_pd(A, B, C) \
3170 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
3172 #define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
3173 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C)
3175 #define _mm512_maskz_scalef_round_pd(U, A, B, C) \
3176 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
3178 #define _mm512_scalef_round_ps(A, B, C) \
3179 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
3181 #define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
3182 (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C)
3184 #define _mm512_maskz_scalef_round_ps(U, A, B, C) \
3185 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
3187 #define _mm_scalef_round_sd(A, B, C) \
3188 (__m128d)__builtin_ia32_scalefsd_mask_round (A, B, \
3189 (__v2df)_mm_setzero_pd (), -1, C)
3191 #define _mm_scalef_round_ss(A, B, C) \
3192 (__m128)__builtin_ia32_scalefss_mask_round (A, B, \
3193 (__v4sf)_mm_setzero_ps (), -1, C)
3194 #endif
3196 #ifdef __OPTIMIZE__
3197 extern __inline __m512d
3198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3199 _mm512_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3201 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3202 (__v8df) __B,
3203 (__v8df) __C,
3204 (__mmask8) -1, __R);
3207 extern __inline __m512d
3208 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3209 _mm512_mask_fmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3210 __m512d __C, const int __R)
3212 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3213 (__v8df) __B,
3214 (__v8df) __C,
3215 (__mmask8) __U, __R);
3218 extern __inline __m512d
3219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3220 _mm512_mask3_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3221 __mmask8 __U, const int __R)
3223 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
3224 (__v8df) __B,
3225 (__v8df) __C,
3226 (__mmask8) __U, __R);
3229 extern __inline __m512d
3230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3231 _mm512_maskz_fmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3232 __m512d __C, const int __R)
3234 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
3235 (__v8df) __B,
3236 (__v8df) __C,
3237 (__mmask8) __U, __R);
3240 extern __inline __m512
3241 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3242 _mm512_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3244 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3245 (__v16sf) __B,
3246 (__v16sf) __C,
3247 (__mmask16) -1, __R);
3250 extern __inline __m512
3251 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3252 _mm512_mask_fmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3253 __m512 __C, const int __R)
3255 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3256 (__v16sf) __B,
3257 (__v16sf) __C,
3258 (__mmask16) __U, __R);
3261 extern __inline __m512
3262 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3263 _mm512_mask3_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3264 __mmask16 __U, const int __R)
3266 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
3267 (__v16sf) __B,
3268 (__v16sf) __C,
3269 (__mmask16) __U, __R);
3272 extern __inline __m512
3273 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3274 _mm512_maskz_fmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3275 __m512 __C, const int __R)
3277 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
3278 (__v16sf) __B,
3279 (__v16sf) __C,
3280 (__mmask16) __U, __R);
3283 extern __inline __m512d
3284 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3285 _mm512_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3287 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3288 (__v8df) __B,
3289 -(__v8df) __C,
3290 (__mmask8) -1, __R);
3293 extern __inline __m512d
3294 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3295 _mm512_mask_fmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3296 __m512d __C, const int __R)
3298 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3299 (__v8df) __B,
3300 -(__v8df) __C,
3301 (__mmask8) __U, __R);
3304 extern __inline __m512d
3305 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3306 _mm512_mask3_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3307 __mmask8 __U, const int __R)
3309 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
3310 (__v8df) __B,
3311 (__v8df) __C,
3312 (__mmask8) __U, __R);
3315 extern __inline __m512d
3316 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3317 _mm512_maskz_fmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3318 __m512d __C, const int __R)
3320 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
3321 (__v8df) __B,
3322 -(__v8df) __C,
3323 (__mmask8) __U, __R);
3326 extern __inline __m512
3327 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3328 _mm512_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3330 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3331 (__v16sf) __B,
3332 -(__v16sf) __C,
3333 (__mmask16) -1, __R);
3336 extern __inline __m512
3337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3338 _mm512_mask_fmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3339 __m512 __C, const int __R)
3341 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3342 (__v16sf) __B,
3343 -(__v16sf) __C,
3344 (__mmask16) __U, __R);
3347 extern __inline __m512
3348 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3349 _mm512_mask3_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3350 __mmask16 __U, const int __R)
3352 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
3353 (__v16sf) __B,
3354 (__v16sf) __C,
3355 (__mmask16) __U, __R);
3358 extern __inline __m512
3359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3360 _mm512_maskz_fmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3361 __m512 __C, const int __R)
3363 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
3364 (__v16sf) __B,
3365 -(__v16sf) __C,
3366 (__mmask16) __U, __R);
3369 extern __inline __m512d
3370 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3371 _mm512_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3373 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3374 (__v8df) __B,
3375 (__v8df) __C,
3376 (__mmask8) -1, __R);
3379 extern __inline __m512d
3380 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3381 _mm512_mask_fmaddsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3382 __m512d __C, const int __R)
3384 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3385 (__v8df) __B,
3386 (__v8df) __C,
3387 (__mmask8) __U, __R);
3390 extern __inline __m512d
3391 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3392 _mm512_mask3_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3393 __mmask8 __U, const int __R)
3395 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
3396 (__v8df) __B,
3397 (__v8df) __C,
3398 (__mmask8) __U, __R);
3401 extern __inline __m512d
3402 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3403 _mm512_maskz_fmaddsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3404 __m512d __C, const int __R)
3406 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3407 (__v8df) __B,
3408 (__v8df) __C,
3409 (__mmask8) __U, __R);
3412 extern __inline __m512
3413 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3414 _mm512_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3416 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3417 (__v16sf) __B,
3418 (__v16sf) __C,
3419 (__mmask16) -1, __R);
3422 extern __inline __m512
3423 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3424 _mm512_mask_fmaddsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3425 __m512 __C, const int __R)
3427 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3428 (__v16sf) __B,
3429 (__v16sf) __C,
3430 (__mmask16) __U, __R);
3433 extern __inline __m512
3434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3435 _mm512_mask3_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3436 __mmask16 __U, const int __R)
3438 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
3439 (__v16sf) __B,
3440 (__v16sf) __C,
3441 (__mmask16) __U, __R);
3444 extern __inline __m512
3445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3446 _mm512_maskz_fmaddsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3447 __m512 __C, const int __R)
3449 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3450 (__v16sf) __B,
3451 (__v16sf) __C,
3452 (__mmask16) __U, __R);
3455 extern __inline __m512d
3456 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3457 _mm512_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3459 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3460 (__v8df) __B,
3461 -(__v8df) __C,
3462 (__mmask8) -1, __R);
3465 extern __inline __m512d
3466 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3467 _mm512_mask_fmsubadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3468 __m512d __C, const int __R)
3470 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3471 (__v8df) __B,
3472 -(__v8df) __C,
3473 (__mmask8) __U, __R);
3476 extern __inline __m512d
3477 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3478 _mm512_mask3_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3479 __mmask8 __U, const int __R)
3481 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3482 (__v8df) __B,
3483 (__v8df) __C,
3484 (__mmask8) __U, __R);
3487 extern __inline __m512d
3488 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3489 _mm512_maskz_fmsubadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3490 __m512d __C, const int __R)
3492 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3493 (__v8df) __B,
3494 -(__v8df) __C,
3495 (__mmask8) __U, __R);
3498 extern __inline __m512
3499 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3500 _mm512_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3502 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3503 (__v16sf) __B,
3504 -(__v16sf) __C,
3505 (__mmask16) -1, __R);
3508 extern __inline __m512
3509 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3510 _mm512_mask_fmsubadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3511 __m512 __C, const int __R)
3513 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3514 (__v16sf) __B,
3515 -(__v16sf) __C,
3516 (__mmask16) __U, __R);
3519 extern __inline __m512
3520 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3521 _mm512_mask3_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3522 __mmask16 __U, const int __R)
3524 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3525 (__v16sf) __B,
3526 (__v16sf) __C,
3527 (__mmask16) __U, __R);
3530 extern __inline __m512
3531 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3532 _mm512_maskz_fmsubadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3533 __m512 __C, const int __R)
3535 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3536 (__v16sf) __B,
3537 -(__v16sf) __C,
3538 (__mmask16) __U, __R);
3541 extern __inline __m512d
3542 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3543 _mm512_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3545 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
3546 (__v8df) __B,
3547 (__v8df) __C,
3548 (__mmask8) -1, __R);
3551 extern __inline __m512d
3552 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3553 _mm512_mask_fnmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3554 __m512d __C, const int __R)
3556 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
3557 (__v8df) __B,
3558 (__v8df) __C,
3559 (__mmask8) __U, __R);
3562 extern __inline __m512d
3563 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3564 _mm512_mask3_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3565 __mmask8 __U, const int __R)
3567 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
3568 (__v8df) __B,
3569 (__v8df) __C,
3570 (__mmask8) __U, __R);
3573 extern __inline __m512d
3574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3575 _mm512_maskz_fnmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3576 __m512d __C, const int __R)
3578 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
3579 (__v8df) __B,
3580 (__v8df) __C,
3581 (__mmask8) __U, __R);
3584 extern __inline __m512
3585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3586 _mm512_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3588 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3589 (__v16sf) __B,
3590 (__v16sf) __C,
3591 (__mmask16) -1, __R);
3594 extern __inline __m512
3595 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3596 _mm512_mask_fnmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3597 __m512 __C, const int __R)
3599 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
3600 (__v16sf) __B,
3601 (__v16sf) __C,
3602 (__mmask16) __U, __R);
3605 extern __inline __m512
3606 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3607 _mm512_mask3_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3608 __mmask16 __U, const int __R)
3610 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
3611 (__v16sf) __B,
3612 (__v16sf) __C,
3613 (__mmask16) __U, __R);
3616 extern __inline __m512
3617 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3618 _mm512_maskz_fnmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3619 __m512 __C, const int __R)
3621 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3622 (__v16sf) __B,
3623 (__v16sf) __C,
3624 (__mmask16) __U, __R);
3627 extern __inline __m512d
3628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3629 _mm512_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3631 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
3632 (__v8df) __B,
3633 -(__v8df) __C,
3634 (__mmask8) -1, __R);
3637 extern __inline __m512d
3638 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3639 _mm512_mask_fnmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3640 __m512d __C, const int __R)
3642 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
3643 (__v8df) __B,
3644 (__v8df) __C,
3645 (__mmask8) __U, __R);
3648 extern __inline __m512d
3649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3650 _mm512_mask3_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3651 __mmask8 __U, const int __R)
3653 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
3654 (__v8df) __B,
3655 (__v8df) __C,
3656 (__mmask8) __U, __R);
3659 extern __inline __m512d
3660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3661 _mm512_maskz_fnmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3662 __m512d __C, const int __R)
3664 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
3665 (__v8df) __B,
3666 -(__v8df) __C,
3667 (__mmask8) __U, __R);
3670 extern __inline __m512
3671 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3672 _mm512_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3674 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3675 (__v16sf) __B,
3676 -(__v16sf) __C,
3677 (__mmask16) -1, __R);
3680 extern __inline __m512
3681 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3682 _mm512_mask_fnmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3683 __m512 __C, const int __R)
3685 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
3686 (__v16sf) __B,
3687 (__v16sf) __C,
3688 (__mmask16) __U, __R);
3691 extern __inline __m512
3692 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3693 _mm512_mask3_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3694 __mmask16 __U, const int __R)
3696 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
3697 (__v16sf) __B,
3698 (__v16sf) __C,
3699 (__mmask16) __U, __R);
3702 extern __inline __m512
3703 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3704 _mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3705 __m512 __C, const int __R)
3707 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3708 (__v16sf) __B,
3709 -(__v16sf) __C,
3710 (__mmask16) __U, __R);
3712 #else
3713 #define _mm512_fmadd_round_pd(A, B, C, R) \
3714 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, -1, R)
3716 #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
3717 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3719 #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
3720 (__m512d)__builtin_ia32_vfmaddpd512_mask3(A, B, C, U, R)
3722 #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
3723 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, C, U, R)
3725 #define _mm512_fmadd_round_ps(A, B, C, R) \
3726 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, -1, R)
3728 #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
3729 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, U, R)
3731 #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
3732 (__m512)__builtin_ia32_vfmaddps512_mask3(A, B, C, U, R)
3734 #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
3735 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, C, U, R)
3737 #define _mm512_fmsub_round_pd(A, B, C, R) \
3738 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), -1, R)
3740 #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
3741 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), U, R)
3743 #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
3744 (__m512d)__builtin_ia32_vfmsubpd512_mask3(A, B, C, U, R)
3746 #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
3747 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, -(C), U, R)
3749 #define _mm512_fmsub_round_ps(A, B, C, R) \
3750 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), -1, R)
3752 #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
3753 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), U, R)
3755 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
3756 (__m512)__builtin_ia32_vfmsubps512_mask3(A, B, C, U, R)
3758 #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
3759 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, -(C), U, R)
3761 #define _mm512_fmaddsub_round_pd(A, B, C, R) \
3762 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, -1, R)
3764 #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
3765 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3767 #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
3768 (__m512d)__builtin_ia32_vfmaddsubpd512_mask3(A, B, C, U, R)
3770 #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
3771 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, C, U, R)
3773 #define _mm512_fmaddsub_round_ps(A, B, C, R) \
3774 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, -1, R)
3776 #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
3777 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, U, R)
3779 #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
3780 (__m512)__builtin_ia32_vfmaddsubps512_mask3(A, B, C, U, R)
3782 #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
3783 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, C, U, R)
3785 #define _mm512_fmsubadd_round_pd(A, B, C, R) \
3786 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), -1, R)
3788 #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
3789 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), U, R)
3791 #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
3792 (__m512d)__builtin_ia32_vfmsubaddpd512_mask3(A, B, C, U, R)
3794 #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
3795 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, -(C), U, R)
3797 #define _mm512_fmsubadd_round_ps(A, B, C, R) \
3798 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), -1, R)
3800 #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
3801 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), U, R)
3803 #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
3804 (__m512)__builtin_ia32_vfmsubaddps512_mask3(A, B, C, U, R)
3806 #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
3807 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, -(C), U, R)
3809 #define _mm512_fnmadd_round_pd(A, B, C, R) \
3810 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, C, -1, R)
3812 #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
3813 (__m512d)__builtin_ia32_vfnmaddpd512_mask(-(A), B, C, U, R)
3815 #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
3816 (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(A), B, C, U, R)
3818 #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
3819 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, C, U, R)
3821 #define _mm512_fnmadd_round_ps(A, B, C, R) \
3822 (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, C, -1, R)
3824 #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
3825 (__m512)__builtin_ia32_vfnmaddps512_mask(-(A), B, C, U, R)
3827 #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
3828 (__m512)__builtin_ia32_vfmaddps512_mask3(-(A), B, C, U, R)
3830 #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
3831 (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, C, U, R)
3833 #define _mm512_fnmsub_round_pd(A, B, C, R) \
3834 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, -(C), -1, R)
3836 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
3837 (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, U, R)
3839 #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
3840 (__m512d)__builtin_ia32_vfnmsubpd512_mask3(A, B, C, U, R)
3842 #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
3843 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, -(C), U, R)
3845 #define _mm512_fnmsub_round_ps(A, B, C, R) \
3846 (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, -(C), -1, R)
3848 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
3849 (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, U, R)
3851 #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
3852 (__m512)__builtin_ia32_vfnmsubps512_mask3(A, B, C, U, R)
3854 #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
3855 (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, -(C), U, R)
3856 #endif
3858 extern __inline __m512i
3859 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3860 _mm512_abs_epi64 (__m512i __A)
3862 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3863 (__v8di)
3864 _mm512_undefined_epi32 (),
3865 (__mmask8) -1);
3868 extern __inline __m512i
3869 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3870 _mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
3872 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3873 (__v8di) __W,
3874 (__mmask8) __U);
3877 extern __inline __m512i
3878 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3879 _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
3881 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3882 (__v8di)
3883 _mm512_setzero_si512 (),
3884 (__mmask8) __U);
3887 extern __inline __m512i
3888 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3889 _mm512_abs_epi32 (__m512i __A)
3891 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3892 (__v16si)
3893 _mm512_undefined_epi32 (),
3894 (__mmask16) -1);
3897 extern __inline __m512i
3898 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3899 _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
3901 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3902 (__v16si) __W,
3903 (__mmask16) __U);
3906 extern __inline __m512i
3907 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3908 _mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
3910 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3911 (__v16si)
3912 _mm512_setzero_si512 (),
3913 (__mmask16) __U);
3916 extern __inline __m512
3917 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3918 _mm512_broadcastss_ps (__m128 __A)
3920 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3921 (__v16sf)
3922 _mm512_undefined_ps (),
3923 (__mmask16) -1);
3926 extern __inline __m512
3927 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3928 _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
3930 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3931 (__v16sf) __O, __M);
3934 extern __inline __m512
3935 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3936 _mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
3938 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3939 (__v16sf)
3940 _mm512_setzero_ps (),
3941 __M);
3944 extern __inline __m512d
3945 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3946 _mm512_broadcastsd_pd (__m128d __A)
3948 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3949 (__v8df)
3950 _mm512_undefined_pd (),
3951 (__mmask8) -1);
3954 extern __inline __m512d
3955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3956 _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
3958 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3959 (__v8df) __O, __M);
3962 extern __inline __m512d
3963 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3964 _mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
3966 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3967 (__v8df)
3968 _mm512_setzero_pd (),
3969 __M);
3972 extern __inline __m512i
3973 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3974 _mm512_broadcastd_epi32 (__m128i __A)
3976 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3977 (__v16si)
3978 _mm512_undefined_epi32 (),
3979 (__mmask16) -1);
3982 extern __inline __m512i
3983 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3984 _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
3986 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3987 (__v16si) __O, __M);
3990 extern __inline __m512i
3991 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3992 _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
3994 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3995 (__v16si)
3996 _mm512_setzero_si512 (),
3997 __M);
4000 extern __inline __m512i
4001 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4002 _mm512_set1_epi32 (int __A)
4004 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
4005 (__v16si)
4006 _mm512_undefined_epi32 (),
4007 (__mmask16)(-1));
4010 extern __inline __m512i
4011 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4012 _mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
4014 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O,
4015 __M);
4018 extern __inline __m512i
4019 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4020 _mm512_maskz_set1_epi32 (__mmask16 __M, int __A)
4022 return (__m512i)
4023 __builtin_ia32_pbroadcastd512_gpr_mask (__A,
4024 (__v16si) _mm512_setzero_si512 (),
4025 __M);
4028 extern __inline __m512i
4029 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4030 _mm512_broadcastq_epi64 (__m128i __A)
4032 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
4033 (__v8di)
4034 _mm512_undefined_epi32 (),
4035 (__mmask8) -1);
4038 extern __inline __m512i
4039 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4040 _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
4042 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
4043 (__v8di) __O, __M);
4046 extern __inline __m512i
4047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4048 _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
4050 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
4051 (__v8di)
4052 _mm512_setzero_si512 (),
4053 __M);
4056 extern __inline __m512i
4057 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4058 _mm512_set1_epi64 (long long __A)
4060 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
4061 (__v8di)
4062 _mm512_undefined_epi32 (),
4063 (__mmask8)(-1));
4066 extern __inline __m512i
4067 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4068 _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
4070 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
4071 __M);
4074 extern __inline __m512i
4075 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4076 _mm512_maskz_set1_epi64 (__mmask8 __M, long long __A)
4078 return (__m512i)
4079 __builtin_ia32_pbroadcastq512_gpr_mask (__A,
4080 (__v8di) _mm512_setzero_si512 (),
4081 __M);
4084 extern __inline __m512
4085 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4086 _mm512_broadcast_f32x4 (__m128 __A)
4088 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
4089 (__v16sf)
4090 _mm512_undefined_ps (),
4091 (__mmask16) -1);
4094 extern __inline __m512
4095 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4096 _mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A)
4098 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
4099 (__v16sf) __O,
4100 __M);
4103 extern __inline __m512
4104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4105 _mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A)
4107 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
4108 (__v16sf)
4109 _mm512_setzero_ps (),
4110 __M);
4113 extern __inline __m512i
4114 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4115 _mm512_broadcast_i32x4 (__m128i __A)
4117 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
4118 (__v16si)
4119 _mm512_undefined_epi32 (),
4120 (__mmask16) -1);
4123 extern __inline __m512i
4124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4125 _mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A)
4127 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
4128 (__v16si) __O,
4129 __M);
4132 extern __inline __m512i
4133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4134 _mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A)
4136 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
4137 (__v16si)
4138 _mm512_setzero_si512 (),
4139 __M);
4142 extern __inline __m512d
4143 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4144 _mm512_broadcast_f64x4 (__m256d __A)
4146 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
4147 (__v8df)
4148 _mm512_undefined_pd (),
4149 (__mmask8) -1);
4152 extern __inline __m512d
4153 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4154 _mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A)
4156 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
4157 (__v8df) __O,
4158 __M);
4161 extern __inline __m512d
4162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4163 _mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A)
4165 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
4166 (__v8df)
4167 _mm512_setzero_pd (),
4168 __M);
4171 extern __inline __m512i
4172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4173 _mm512_broadcast_i64x4 (__m256i __A)
4175 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
4176 (__v8di)
4177 _mm512_undefined_epi32 (),
4178 (__mmask8) -1);
4181 extern __inline __m512i
4182 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4183 _mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A)
4185 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
4186 (__v8di) __O,
4187 __M);
4190 extern __inline __m512i
4191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4192 _mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A)
4194 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
4195 (__v8di)
4196 _mm512_setzero_si512 (),
4197 __M);
4200 typedef enum
4202 _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
4203 _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
4204 _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
4205 _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
4206 _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
4207 _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
4208 _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
4209 _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
4210 _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
4211 _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
4212 _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
4213 _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
4214 _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
4215 _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
4216 _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
4217 _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
4218 _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
4219 _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
4220 _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
4221 _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
4222 _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
4223 _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
4224 _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
4225 _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
4226 _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
4227 _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
4228 _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
4229 _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
4230 _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
4231 _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
4232 _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
4233 _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
4234 _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
4235 _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
4236 _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
4237 _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
4238 _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
4239 _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
4240 _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
4241 _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
4242 _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
4243 _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
4244 _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
4245 _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
4246 _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
4247 _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
4248 _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
4249 _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
4250 _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
4251 _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
4252 _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
4253 _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
4254 _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
4255 _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
4256 _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
4257 _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
4258 _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
4259 _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
4260 _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
4261 _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
4262 _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
4263 _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
4264 _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
4265 _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
4266 _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
4267 _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
4268 _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
4269 _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
4270 _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
4271 _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
4272 _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
4273 _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
4274 _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
4275 _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
4276 _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
4277 _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
4278 _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
4279 _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
4280 _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
4281 _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
4282 _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
4283 _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
4284 _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
4285 _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
4286 _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
4287 _MM_PERM_DDDD = 0xFF
4288 } _MM_PERM_ENUM;
4290 #ifdef __OPTIMIZE__
4291 extern __inline __m512i
4292 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4293 _mm512_shuffle_epi32 (__m512i __A, _MM_PERM_ENUM __mask)
4295 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
4296 __mask,
4297 (__v16si)
4298 _mm512_undefined_epi32 (),
4299 (__mmask16) -1);
4302 extern __inline __m512i
4303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4304 _mm512_mask_shuffle_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
4305 _MM_PERM_ENUM __mask)
4307 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
4308 __mask,
4309 (__v16si) __W,
4310 (__mmask16) __U);
4313 extern __inline __m512i
4314 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4315 _mm512_maskz_shuffle_epi32 (__mmask16 __U, __m512i __A, _MM_PERM_ENUM __mask)
4317 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
4318 __mask,
4319 (__v16si)
4320 _mm512_setzero_si512 (),
4321 (__mmask16) __U);
4324 extern __inline __m512i
4325 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4326 _mm512_shuffle_i64x2 (__m512i __A, __m512i __B, const int __imm)
4328 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
4329 (__v8di) __B, __imm,
4330 (__v8di)
4331 _mm512_undefined_epi32 (),
4332 (__mmask8) -1);
4335 extern __inline __m512i
4336 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4337 _mm512_mask_shuffle_i64x2 (__m512i __W, __mmask8 __U, __m512i __A,
4338 __m512i __B, const int __imm)
4340 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
4341 (__v8di) __B, __imm,
4342 (__v8di) __W,
4343 (__mmask8) __U);
4346 extern __inline __m512i
4347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4348 _mm512_maskz_shuffle_i64x2 (__mmask8 __U, __m512i __A, __m512i __B,
4349 const int __imm)
4351 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
4352 (__v8di) __B, __imm,
4353 (__v8di)
4354 _mm512_setzero_si512 (),
4355 (__mmask8) __U);
4358 extern __inline __m512i
4359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4360 _mm512_shuffle_i32x4 (__m512i __A, __m512i __B, const int __imm)
4362 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
4363 (__v16si) __B,
4364 __imm,
4365 (__v16si)
4366 _mm512_undefined_epi32 (),
4367 (__mmask16) -1);
4370 extern __inline __m512i
4371 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4372 _mm512_mask_shuffle_i32x4 (__m512i __W, __mmask16 __U, __m512i __A,
4373 __m512i __B, const int __imm)
4375 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
4376 (__v16si) __B,
4377 __imm,
4378 (__v16si) __W,
4379 (__mmask16) __U);
4382 extern __inline __m512i
4383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4384 _mm512_maskz_shuffle_i32x4 (__mmask16 __U, __m512i __A, __m512i __B,
4385 const int __imm)
4387 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
4388 (__v16si) __B,
4389 __imm,
4390 (__v16si)
4391 _mm512_setzero_si512 (),
4392 (__mmask16) __U);
4395 extern __inline __m512d
4396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4397 _mm512_shuffle_f64x2 (__m512d __A, __m512d __B, const int __imm)
4399 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
4400 (__v8df) __B, __imm,
4401 (__v8df)
4402 _mm512_undefined_pd (),
4403 (__mmask8) -1);
4406 extern __inline __m512d
4407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4408 _mm512_mask_shuffle_f64x2 (__m512d __W, __mmask8 __U, __m512d __A,
4409 __m512d __B, const int __imm)
4411 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
4412 (__v8df) __B, __imm,
4413 (__v8df) __W,
4414 (__mmask8) __U);
4417 extern __inline __m512d
4418 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4419 _mm512_maskz_shuffle_f64x2 (__mmask8 __U, __m512d __A, __m512d __B,
4420 const int __imm)
4422 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
4423 (__v8df) __B, __imm,
4424 (__v8df)
4425 _mm512_setzero_pd (),
4426 (__mmask8) __U);
4429 extern __inline __m512
4430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4431 _mm512_shuffle_f32x4 (__m512 __A, __m512 __B, const int __imm)
4433 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4434 (__v16sf) __B, __imm,
4435 (__v16sf)
4436 _mm512_undefined_ps (),
4437 (__mmask16) -1);
4440 extern __inline __m512
4441 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4442 _mm512_mask_shuffle_f32x4 (__m512 __W, __mmask16 __U, __m512 __A,
4443 __m512 __B, const int __imm)
4445 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4446 (__v16sf) __B, __imm,
4447 (__v16sf) __W,
4448 (__mmask16) __U);
4451 extern __inline __m512
4452 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4453 _mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B,
4454 const int __imm)
4456 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4457 (__v16sf) __B, __imm,
4458 (__v16sf)
4459 _mm512_setzero_ps (),
4460 (__mmask16) __U);
4463 #else
4464 #define _mm512_shuffle_epi32(X, C) \
4465 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4466 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
4467 (__mmask16)-1))
4469 #define _mm512_mask_shuffle_epi32(W, U, X, C) \
4470 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4471 (__v16si)(__m512i)(W),\
4472 (__mmask16)(U)))
4474 #define _mm512_maskz_shuffle_epi32(U, X, C) \
4475 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4476 (__v16si)(__m512i)_mm512_setzero_si512 (),\
4477 (__mmask16)(U)))
4479 #define _mm512_shuffle_i64x2(X, Y, C) \
4480 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4481 (__v8di)(__m512i)(Y), (int)(C),\
4482 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
4483 (__mmask8)-1))
4485 #define _mm512_mask_shuffle_i64x2(W, U, X, Y, C) \
4486 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4487 (__v8di)(__m512i)(Y), (int)(C),\
4488 (__v8di)(__m512i)(W),\
4489 (__mmask8)(U)))
4491 #define _mm512_maskz_shuffle_i64x2(U, X, Y, C) \
4492 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4493 (__v8di)(__m512i)(Y), (int)(C),\
4494 (__v8di)(__m512i)_mm512_setzero_si512 (),\
4495 (__mmask8)(U)))
4497 #define _mm512_shuffle_i32x4(X, Y, C) \
4498 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4499 (__v16si)(__m512i)(Y), (int)(C),\
4500 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
4501 (__mmask16)-1))
4503 #define _mm512_mask_shuffle_i32x4(W, U, X, Y, C) \
4504 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4505 (__v16si)(__m512i)(Y), (int)(C),\
4506 (__v16si)(__m512i)(W),\
4507 (__mmask16)(U)))
4509 #define _mm512_maskz_shuffle_i32x4(U, X, Y, C) \
4510 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4511 (__v16si)(__m512i)(Y), (int)(C),\
4512 (__v16si)(__m512i)_mm512_setzero_si512 (),\
4513 (__mmask16)(U)))
4515 #define _mm512_shuffle_f64x2(X, Y, C) \
4516 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4517 (__v8df)(__m512d)(Y), (int)(C),\
4518 (__v8df)(__m512d)_mm512_undefined_pd(),\
4519 (__mmask8)-1))
4521 #define _mm512_mask_shuffle_f64x2(W, U, X, Y, C) \
4522 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4523 (__v8df)(__m512d)(Y), (int)(C),\
4524 (__v8df)(__m512d)(W),\
4525 (__mmask8)(U)))
4527 #define _mm512_maskz_shuffle_f64x2(U, X, Y, C) \
4528 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4529 (__v8df)(__m512d)(Y), (int)(C),\
4530 (__v8df)(__m512d)_mm512_setzero_pd(),\
4531 (__mmask8)(U)))
4533 #define _mm512_shuffle_f32x4(X, Y, C) \
4534 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4535 (__v16sf)(__m512)(Y), (int)(C),\
4536 (__v16sf)(__m512)_mm512_undefined_ps(),\
4537 (__mmask16)-1))
4539 #define _mm512_mask_shuffle_f32x4(W, U, X, Y, C) \
4540 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4541 (__v16sf)(__m512)(Y), (int)(C),\
4542 (__v16sf)(__m512)(W),\
4543 (__mmask16)(U)))
4545 #define _mm512_maskz_shuffle_f32x4(U, X, Y, C) \
4546 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4547 (__v16sf)(__m512)(Y), (int)(C),\
4548 (__v16sf)(__m512)_mm512_setzero_ps(),\
4549 (__mmask16)(U)))
4550 #endif
4552 extern __inline __m512i
4553 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4554 _mm512_rolv_epi32 (__m512i __A, __m512i __B)
4556 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4557 (__v16si) __B,
4558 (__v16si)
4559 _mm512_undefined_epi32 (),
4560 (__mmask16) -1);
4563 extern __inline __m512i
4564 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4565 _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4567 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4568 (__v16si) __B,
4569 (__v16si) __W,
4570 (__mmask16) __U);
4573 extern __inline __m512i
4574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4575 _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4577 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4578 (__v16si) __B,
4579 (__v16si)
4580 _mm512_setzero_si512 (),
4581 (__mmask16) __U);
4584 extern __inline __m512i
4585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4586 _mm512_rorv_epi32 (__m512i __A, __m512i __B)
4588 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4589 (__v16si) __B,
4590 (__v16si)
4591 _mm512_undefined_epi32 (),
4592 (__mmask16) -1);
4595 extern __inline __m512i
4596 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4597 _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4599 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4600 (__v16si) __B,
4601 (__v16si) __W,
4602 (__mmask16) __U);
4605 extern __inline __m512i
4606 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4607 _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4609 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4610 (__v16si) __B,
4611 (__v16si)
4612 _mm512_setzero_si512 (),
4613 (__mmask16) __U);
4616 extern __inline __m512i
4617 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4618 _mm512_rolv_epi64 (__m512i __A, __m512i __B)
4620 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4621 (__v8di) __B,
4622 (__v8di)
4623 _mm512_undefined_epi32 (),
4624 (__mmask8) -1);
4627 extern __inline __m512i
4628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4629 _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4631 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4632 (__v8di) __B,
4633 (__v8di) __W,
4634 (__mmask8) __U);
4637 extern __inline __m512i
4638 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4639 _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4641 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4642 (__v8di) __B,
4643 (__v8di)
4644 _mm512_setzero_si512 (),
4645 (__mmask8) __U);
4648 extern __inline __m512i
4649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4650 _mm512_rorv_epi64 (__m512i __A, __m512i __B)
4652 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4653 (__v8di) __B,
4654 (__v8di)
4655 _mm512_undefined_epi32 (),
4656 (__mmask8) -1);
4659 extern __inline __m512i
4660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4661 _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4663 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4664 (__v8di) __B,
4665 (__v8di) __W,
4666 (__mmask8) __U);
4669 extern __inline __m512i
4670 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4671 _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4673 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4674 (__v8di) __B,
4675 (__v8di)
4676 _mm512_setzero_si512 (),
4677 (__mmask8) __U);
4680 #ifdef __OPTIMIZE__
4681 extern __inline __m256i
4682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4683 _mm512_cvtt_roundpd_epi32 (__m512d __A, const int __R)
4685 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4686 (__v8si)
4687 _mm256_undefined_si256 (),
4688 (__mmask8) -1, __R);
4691 extern __inline __m256i
4692 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4693 _mm512_mask_cvtt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4694 const int __R)
4696 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4697 (__v8si) __W,
4698 (__mmask8) __U, __R);
4701 extern __inline __m256i
4702 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4703 _mm512_maskz_cvtt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4705 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4706 (__v8si)
4707 _mm256_setzero_si256 (),
4708 (__mmask8) __U, __R);
4711 extern __inline __m256i
4712 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4713 _mm512_cvtt_roundpd_epu32 (__m512d __A, const int __R)
4715 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4716 (__v8si)
4717 _mm256_undefined_si256 (),
4718 (__mmask8) -1, __R);
4721 extern __inline __m256i
4722 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4723 _mm512_mask_cvtt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4724 const int __R)
4726 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4727 (__v8si) __W,
4728 (__mmask8) __U, __R);
4731 extern __inline __m256i
4732 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4733 _mm512_maskz_cvtt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4735 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4736 (__v8si)
4737 _mm256_setzero_si256 (),
4738 (__mmask8) __U, __R);
4740 #else
4741 #define _mm512_cvtt_roundpd_epi32(A, B) \
4742 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4744 #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, B) \
4745 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)(W), U, B))
4747 #define _mm512_maskz_cvtt_roundpd_epi32(U, A, B) \
4748 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4750 #define _mm512_cvtt_roundpd_epu32(A, B) \
4751 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4753 #define _mm512_mask_cvtt_roundpd_epu32(W, U, A, B) \
4754 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)(W), U, B))
4756 #define _mm512_maskz_cvtt_roundpd_epu32(U, A, B) \
4757 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4758 #endif
4760 #ifdef __OPTIMIZE__
4761 extern __inline __m256i
4762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4763 _mm512_cvt_roundpd_epi32 (__m512d __A, const int __R)
4765 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4766 (__v8si)
4767 _mm256_undefined_si256 (),
4768 (__mmask8) -1, __R);
4771 extern __inline __m256i
4772 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4773 _mm512_mask_cvt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4774 const int __R)
4776 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4777 (__v8si) __W,
4778 (__mmask8) __U, __R);
4781 extern __inline __m256i
4782 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4783 _mm512_maskz_cvt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4785 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4786 (__v8si)
4787 _mm256_setzero_si256 (),
4788 (__mmask8) __U, __R);
4791 extern __inline __m256i
4792 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4793 _mm512_cvt_roundpd_epu32 (__m512d __A, const int __R)
4795 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4796 (__v8si)
4797 _mm256_undefined_si256 (),
4798 (__mmask8) -1, __R);
4801 extern __inline __m256i
4802 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4803 _mm512_mask_cvt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4804 const int __R)
4806 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4807 (__v8si) __W,
4808 (__mmask8) __U, __R);
4811 extern __inline __m256i
4812 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4813 _mm512_maskz_cvt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4815 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4816 (__v8si)
4817 _mm256_setzero_si256 (),
4818 (__mmask8) __U, __R);
4820 #else
4821 #define _mm512_cvt_roundpd_epi32(A, B) \
4822 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4824 #define _mm512_mask_cvt_roundpd_epi32(W, U, A, B) \
4825 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)(W), U, B))
4827 #define _mm512_maskz_cvt_roundpd_epi32(U, A, B) \
4828 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4830 #define _mm512_cvt_roundpd_epu32(A, B) \
4831 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4833 #define _mm512_mask_cvt_roundpd_epu32(W, U, A, B) \
4834 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)(W), U, B))
4836 #define _mm512_maskz_cvt_roundpd_epu32(U, A, B) \
4837 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4838 #endif
4840 #ifdef __OPTIMIZE__
4841 extern __inline __m512i
4842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4843 _mm512_cvtt_roundps_epi32 (__m512 __A, const int __R)
4845 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4846 (__v16si)
4847 _mm512_undefined_epi32 (),
4848 (__mmask16) -1, __R);
4851 extern __inline __m512i
4852 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4853 _mm512_mask_cvtt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4854 const int __R)
4856 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4857 (__v16si) __W,
4858 (__mmask16) __U, __R);
4861 extern __inline __m512i
4862 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4863 _mm512_maskz_cvtt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4865 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4866 (__v16si)
4867 _mm512_setzero_si512 (),
4868 (__mmask16) __U, __R);
4871 extern __inline __m512i
4872 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4873 _mm512_cvtt_roundps_epu32 (__m512 __A, const int __R)
4875 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4876 (__v16si)
4877 _mm512_undefined_epi32 (),
4878 (__mmask16) -1, __R);
4881 extern __inline __m512i
4882 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4883 _mm512_mask_cvtt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
4884 const int __R)
4886 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4887 (__v16si) __W,
4888 (__mmask16) __U, __R);
4891 extern __inline __m512i
4892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4893 _mm512_maskz_cvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
4895 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4896 (__v16si)
4897 _mm512_setzero_si512 (),
4898 (__mmask16) __U, __R);
4900 #else
4901 #define _mm512_cvtt_roundps_epi32(A, B) \
4902 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
4904 #define _mm512_mask_cvtt_roundps_epi32(W, U, A, B) \
4905 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)(W), U, B))
4907 #define _mm512_maskz_cvtt_roundps_epi32(U, A, B) \
4908 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4910 #define _mm512_cvtt_roundps_epu32(A, B) \
4911 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
4913 #define _mm512_mask_cvtt_roundps_epu32(W, U, A, B) \
4914 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)(W), U, B))
4916 #define _mm512_maskz_cvtt_roundps_epu32(U, A, B) \
4917 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4918 #endif
4920 #ifdef __OPTIMIZE__
4921 extern __inline __m512i
4922 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4923 _mm512_cvt_roundps_epi32 (__m512 __A, const int __R)
4925 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4926 (__v16si)
4927 _mm512_undefined_epi32 (),
4928 (__mmask16) -1, __R);
4931 extern __inline __m512i
4932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4933 _mm512_mask_cvt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4934 const int __R)
4936 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4937 (__v16si) __W,
4938 (__mmask16) __U, __R);
4941 extern __inline __m512i
4942 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4943 _mm512_maskz_cvt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4945 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4946 (__v16si)
4947 _mm512_setzero_si512 (),
4948 (__mmask16) __U, __R);
4951 extern __inline __m512i
4952 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4953 _mm512_cvt_roundps_epu32 (__m512 __A, const int __R)
4955 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4956 (__v16si)
4957 _mm512_undefined_epi32 (),
4958 (__mmask16) -1, __R);
4961 extern __inline __m512i
4962 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4963 _mm512_mask_cvt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
4964 const int __R)
4966 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4967 (__v16si) __W,
4968 (__mmask16) __U, __R);
4971 extern __inline __m512i
4972 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4973 _mm512_maskz_cvt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
4975 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4976 (__v16si)
4977 _mm512_setzero_si512 (),
4978 (__mmask16) __U, __R);
4980 #else
4981 #define _mm512_cvt_roundps_epi32(A, B) \
4982 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
4984 #define _mm512_mask_cvt_roundps_epi32(W, U, A, B) \
4985 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)(W), U, B))
4987 #define _mm512_maskz_cvt_roundps_epi32(U, A, B) \
4988 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4990 #define _mm512_cvt_roundps_epu32(A, B) \
4991 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
4993 #define _mm512_mask_cvt_roundps_epu32(W, U, A, B) \
4994 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)(W), U, B))
4996 #define _mm512_maskz_cvt_roundps_epu32(U, A, B) \
4997 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4998 #endif
5000 extern __inline __m128d
5001 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5002 _mm_cvtu32_sd (__m128d __A, unsigned __B)
5004 return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
5007 #ifdef __x86_64__
5008 #ifdef __OPTIMIZE__
5009 extern __inline __m128d
5010 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5011 _mm_cvt_roundu64_sd (__m128d __A, unsigned long long __B, const int __R)
5013 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, __R);
5016 extern __inline __m128d
5017 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5018 _mm_cvt_roundi64_sd (__m128d __A, long long __B, const int __R)
5020 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
5023 extern __inline __m128d
5024 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5025 _mm_cvt_roundsi64_sd (__m128d __A, long long __B, const int __R)
5027 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
5029 #else
5030 #define _mm_cvt_roundu64_sd(A, B, C) \
5031 (__m128d)__builtin_ia32_cvtusi2sd64(A, B, C)
5033 #define _mm_cvt_roundi64_sd(A, B, C) \
5034 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
5036 #define _mm_cvt_roundsi64_sd(A, B, C) \
5037 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
5038 #endif
5040 #endif
5042 #ifdef __OPTIMIZE__
5043 extern __inline __m128
5044 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5045 _mm_cvt_roundu32_ss (__m128 __A, unsigned __B, const int __R)
5047 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, __R);
5050 extern __inline __m128
5051 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5052 _mm_cvt_roundsi32_ss (__m128 __A, int __B, const int __R)
5054 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
5057 extern __inline __m128
5058 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5059 _mm_cvt_roundi32_ss (__m128 __A, int __B, const int __R)
5061 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
5063 #else
5064 #define _mm_cvt_roundu32_ss(A, B, C) \
5065 (__m128)__builtin_ia32_cvtusi2ss32(A, B, C)
5067 #define _mm_cvt_roundi32_ss(A, B, C) \
5068 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
5070 #define _mm_cvt_roundsi32_ss(A, B, C) \
5071 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
5072 #endif
5074 #ifdef __x86_64__
5075 #ifdef __OPTIMIZE__
5076 extern __inline __m128
5077 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5078 _mm_cvt_roundu64_ss (__m128 __A, unsigned long long __B, const int __R)
5080 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, __R);
5083 extern __inline __m128
5084 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5085 _mm_cvt_roundsi64_ss (__m128 __A, long long __B, const int __R)
5087 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
5090 extern __inline __m128
5091 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5092 _mm_cvt_roundi64_ss (__m128 __A, long long __B, const int __R)
5094 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
5096 #else
5097 #define _mm_cvt_roundu64_ss(A, B, C) \
5098 (__m128)__builtin_ia32_cvtusi2ss64(A, B, C)
5100 #define _mm_cvt_roundi64_ss(A, B, C) \
5101 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
5103 #define _mm_cvt_roundsi64_ss(A, B, C) \
5104 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
5105 #endif
5107 #endif
5109 extern __inline __m128i
5110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5111 _mm512_cvtepi32_epi8 (__m512i __A)
5113 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
5114 (__v16qi)
5115 _mm_undefined_si128 (),
5116 (__mmask16) -1);
5119 extern __inline void
5120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5121 _mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
5123 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
5126 extern __inline __m128i
5127 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5128 _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
5130 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
5131 (__v16qi) __O, __M);
5134 extern __inline __m128i
5135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5136 _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
5138 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
5139 (__v16qi)
5140 _mm_setzero_si128 (),
5141 __M);
5144 extern __inline __m128i
5145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5146 _mm512_cvtsepi32_epi8 (__m512i __A)
5148 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
5149 (__v16qi)
5150 _mm_undefined_si128 (),
5151 (__mmask16) -1);
5154 extern __inline void
5155 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5156 _mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
5158 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
5161 extern __inline __m128i
5162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5163 _mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
5165 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
5166 (__v16qi) __O, __M);
5169 extern __inline __m128i
5170 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5171 _mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
5173 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
5174 (__v16qi)
5175 _mm_setzero_si128 (),
5176 __M);
5179 extern __inline __m128i
5180 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5181 _mm512_cvtusepi32_epi8 (__m512i __A)
5183 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
5184 (__v16qi)
5185 _mm_undefined_si128 (),
5186 (__mmask16) -1);
5189 extern __inline void
5190 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5191 _mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
5193 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
5196 extern __inline __m128i
5197 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5198 _mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
5200 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
5201 (__v16qi) __O,
5202 __M);
5205 extern __inline __m128i
5206 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5207 _mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
5209 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
5210 (__v16qi)
5211 _mm_setzero_si128 (),
5212 __M);
5215 extern __inline __m256i
5216 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5217 _mm512_cvtepi32_epi16 (__m512i __A)
5219 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
5220 (__v16hi)
5221 _mm256_undefined_si256 (),
5222 (__mmask16) -1);
5225 extern __inline void
5226 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5227 _mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
5229 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
5232 extern __inline __m256i
5233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5234 _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
5236 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
5237 (__v16hi) __O, __M);
5240 extern __inline __m256i
5241 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5242 _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
5244 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
5245 (__v16hi)
5246 _mm256_setzero_si256 (),
5247 __M);
5250 extern __inline __m256i
5251 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5252 _mm512_cvtsepi32_epi16 (__m512i __A)
5254 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
5255 (__v16hi)
5256 _mm256_undefined_si256 (),
5257 (__mmask16) -1);
5260 extern __inline void
5261 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5262 _mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
5264 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
5267 extern __inline __m256i
5268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5269 _mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
5271 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
5272 (__v16hi) __O, __M);
5275 extern __inline __m256i
5276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5277 _mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
5279 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
5280 (__v16hi)
5281 _mm256_setzero_si256 (),
5282 __M);
5285 extern __inline __m256i
5286 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5287 _mm512_cvtusepi32_epi16 (__m512i __A)
5289 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
5290 (__v16hi)
5291 _mm256_undefined_si256 (),
5292 (__mmask16) -1);
5295 extern __inline void
5296 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5297 _mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
5299 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
5302 extern __inline __m256i
5303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5304 _mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
5306 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
5307 (__v16hi) __O,
5308 __M);
5311 extern __inline __m256i
5312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5313 _mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
5315 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
5316 (__v16hi)
5317 _mm256_setzero_si256 (),
5318 __M);
5321 extern __inline __m256i
5322 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5323 _mm512_cvtepi64_epi32 (__m512i __A)
5325 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
5326 (__v8si)
5327 _mm256_undefined_si256 (),
5328 (__mmask8) -1);
5331 extern __inline void
5332 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5333 _mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
5335 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
5338 extern __inline __m256i
5339 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5340 _mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
5342 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
5343 (__v8si) __O, __M);
5346 extern __inline __m256i
5347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5348 _mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
5350 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
5351 (__v8si)
5352 _mm256_setzero_si256 (),
5353 __M);
5356 extern __inline __m256i
5357 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5358 _mm512_cvtsepi64_epi32 (__m512i __A)
5360 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
5361 (__v8si)
5362 _mm256_undefined_si256 (),
5363 (__mmask8) -1);
5366 extern __inline void
5367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5368 _mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
5370 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
5373 extern __inline __m256i
5374 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5375 _mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
5377 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
5378 (__v8si) __O, __M);
5381 extern __inline __m256i
5382 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5383 _mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
5385 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
5386 (__v8si)
5387 _mm256_setzero_si256 (),
5388 __M);
5391 extern __inline __m256i
5392 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5393 _mm512_cvtusepi64_epi32 (__m512i __A)
5395 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
5396 (__v8si)
5397 _mm256_undefined_si256 (),
5398 (__mmask8) -1);
5401 extern __inline void
5402 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5403 _mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
5405 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
5408 extern __inline __m256i
5409 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5410 _mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
5412 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
5413 (__v8si) __O, __M);
5416 extern __inline __m256i
5417 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5418 _mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
5420 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
5421 (__v8si)
5422 _mm256_setzero_si256 (),
5423 __M);
5426 extern __inline __m128i
5427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5428 _mm512_cvtepi64_epi16 (__m512i __A)
5430 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5431 (__v8hi)
5432 _mm_undefined_si128 (),
5433 (__mmask8) -1);
5436 extern __inline void
5437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5438 _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
5440 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
5443 extern __inline __m128i
5444 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5445 _mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5447 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5448 (__v8hi) __O, __M);
5451 extern __inline __m128i
5452 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5453 _mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
5455 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5456 (__v8hi)
5457 _mm_setzero_si128 (),
5458 __M);
5461 extern __inline __m128i
5462 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5463 _mm512_cvtsepi64_epi16 (__m512i __A)
5465 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5466 (__v8hi)
5467 _mm_undefined_si128 (),
5468 (__mmask8) -1);
5471 extern __inline void
5472 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5473 _mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
5475 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
5478 extern __inline __m128i
5479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5480 _mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5482 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5483 (__v8hi) __O, __M);
5486 extern __inline __m128i
5487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5488 _mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
5490 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5491 (__v8hi)
5492 _mm_setzero_si128 (),
5493 __M);
5496 extern __inline __m128i
5497 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5498 _mm512_cvtusepi64_epi16 (__m512i __A)
5500 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5501 (__v8hi)
5502 _mm_undefined_si128 (),
5503 (__mmask8) -1);
5506 extern __inline void
5507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5508 _mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
5510 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
5513 extern __inline __m128i
5514 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5515 _mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5517 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5518 (__v8hi) __O, __M);
5521 extern __inline __m128i
5522 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5523 _mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
5525 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5526 (__v8hi)
5527 _mm_setzero_si128 (),
5528 __M);
5531 extern __inline __m128i
5532 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5533 _mm512_cvtepi64_epi8 (__m512i __A)
5535 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5536 (__v16qi)
5537 _mm_undefined_si128 (),
5538 (__mmask8) -1);
5541 extern __inline void
5542 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5543 _mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5545 __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5548 extern __inline __m128i
5549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5550 _mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5552 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5553 (__v16qi) __O, __M);
5556 extern __inline __m128i
5557 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5558 _mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
5560 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5561 (__v16qi)
5562 _mm_setzero_si128 (),
5563 __M);
5566 extern __inline __m128i
5567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5568 _mm512_cvtsepi64_epi8 (__m512i __A)
5570 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5571 (__v16qi)
5572 _mm_undefined_si128 (),
5573 (__mmask8) -1);
5576 extern __inline void
5577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5578 _mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5580 __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5583 extern __inline __m128i
5584 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5585 _mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5587 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5588 (__v16qi) __O, __M);
5591 extern __inline __m128i
5592 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5593 _mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
5595 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5596 (__v16qi)
5597 _mm_setzero_si128 (),
5598 __M);
5601 extern __inline __m128i
5602 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5603 _mm512_cvtusepi64_epi8 (__m512i __A)
5605 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5606 (__v16qi)
5607 _mm_undefined_si128 (),
5608 (__mmask8) -1);
5611 extern __inline void
5612 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5613 _mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5615 __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5618 extern __inline __m128i
5619 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5620 _mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5622 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5623 (__v16qi) __O,
5624 __M);
5627 extern __inline __m128i
5628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5629 _mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
5631 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5632 (__v16qi)
5633 _mm_setzero_si128 (),
5634 __M);
5637 extern __inline __m512d
5638 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5639 _mm512_cvtepi32_pd (__m256i __A)
5641 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5642 (__v8df)
5643 _mm512_undefined_pd (),
5644 (__mmask8) -1);
5647 extern __inline __m512d
5648 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5649 _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5651 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5652 (__v8df) __W,
5653 (__mmask8) __U);
5656 extern __inline __m512d
5657 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5658 _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
5660 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5661 (__v8df)
5662 _mm512_setzero_pd (),
5663 (__mmask8) __U);
5666 extern __inline __m512d
5667 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5668 _mm512_cvtepu32_pd (__m256i __A)
5670 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5671 (__v8df)
5672 _mm512_undefined_pd (),
5673 (__mmask8) -1);
5676 extern __inline __m512d
5677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5678 _mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5680 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5681 (__v8df) __W,
5682 (__mmask8) __U);
5685 extern __inline __m512d
5686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5687 _mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
5689 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5690 (__v8df)
5691 _mm512_setzero_pd (),
5692 (__mmask8) __U);
5695 #ifdef __OPTIMIZE__
5696 extern __inline __m512
5697 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5698 _mm512_cvt_roundepi32_ps (__m512i __A, const int __R)
5700 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5701 (__v16sf)
5702 _mm512_undefined_ps (),
5703 (__mmask16) -1, __R);
5706 extern __inline __m512
5707 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5708 _mm512_mask_cvt_roundepi32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5709 const int __R)
5711 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5712 (__v16sf) __W,
5713 (__mmask16) __U, __R);
5716 extern __inline __m512
5717 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5718 _mm512_maskz_cvt_roundepi32_ps (__mmask16 __U, __m512i __A, const int __R)
5720 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5721 (__v16sf)
5722 _mm512_setzero_ps (),
5723 (__mmask16) __U, __R);
5726 extern __inline __m512
5727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5728 _mm512_cvt_roundepu32_ps (__m512i __A, const int __R)
5730 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5731 (__v16sf)
5732 _mm512_undefined_ps (),
5733 (__mmask16) -1, __R);
5736 extern __inline __m512
5737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5738 _mm512_mask_cvt_roundepu32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5739 const int __R)
5741 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5742 (__v16sf) __W,
5743 (__mmask16) __U, __R);
5746 extern __inline __m512
5747 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5748 _mm512_maskz_cvt_roundepu32_ps (__mmask16 __U, __m512i __A, const int __R)
5750 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5751 (__v16sf)
5752 _mm512_setzero_ps (),
5753 (__mmask16) __U, __R);
5756 #else
5757 #define _mm512_cvt_roundepi32_ps(A, B) \
5758 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
5760 #define _mm512_mask_cvt_roundepi32_ps(W, U, A, B) \
5761 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), W, U, B)
5763 #define _mm512_maskz_cvt_roundepi32_ps(U, A, B) \
5764 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5766 #define _mm512_cvt_roundepu32_ps(A, B) \
5767 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
5769 #define _mm512_mask_cvt_roundepu32_ps(W, U, A, B) \
5770 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), W, U, B)
5772 #define _mm512_maskz_cvt_roundepu32_ps(U, A, B) \
5773 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5774 #endif
5776 #ifdef __OPTIMIZE__
5777 extern __inline __m256d
5778 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5779 _mm512_extractf64x4_pd (__m512d __A, const int __imm)
5781 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5782 __imm,
5783 (__v4df)
5784 _mm256_undefined_pd (),
5785 (__mmask8) -1);
5788 extern __inline __m256d
5789 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5790 _mm512_mask_extractf64x4_pd (__m256d __W, __mmask8 __U, __m512d __A,
5791 const int __imm)
5793 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5794 __imm,
5795 (__v4df) __W,
5796 (__mmask8) __U);
5799 extern __inline __m256d
5800 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5801 _mm512_maskz_extractf64x4_pd (__mmask8 __U, __m512d __A, const int __imm)
5803 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5804 __imm,
5805 (__v4df)
5806 _mm256_setzero_pd (),
5807 (__mmask8) __U);
5810 extern __inline __m128
5811 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5812 _mm512_extractf32x4_ps (__m512 __A, const int __imm)
5814 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5815 __imm,
5816 (__v4sf)
5817 _mm_undefined_ps (),
5818 (__mmask8) -1);
5821 extern __inline __m128
5822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5823 _mm512_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m512 __A,
5824 const int __imm)
5826 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5827 __imm,
5828 (__v4sf) __W,
5829 (__mmask8) __U);
5832 extern __inline __m128
5833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5834 _mm512_maskz_extractf32x4_ps (__mmask8 __U, __m512 __A, const int __imm)
5836 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5837 __imm,
5838 (__v4sf)
5839 _mm_setzero_ps (),
5840 (__mmask8) __U);
5843 extern __inline __m256i
5844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5845 _mm512_extracti64x4_epi64 (__m512i __A, const int __imm)
5847 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5848 __imm,
5849 (__v4di)
5850 _mm256_undefined_si256 (),
5851 (__mmask8) -1);
5854 extern __inline __m256i
5855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5856 _mm512_mask_extracti64x4_epi64 (__m256i __W, __mmask8 __U, __m512i __A,
5857 const int __imm)
5859 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5860 __imm,
5861 (__v4di) __W,
5862 (__mmask8) __U);
5865 extern __inline __m256i
5866 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5867 _mm512_maskz_extracti64x4_epi64 (__mmask8 __U, __m512i __A, const int __imm)
5869 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5870 __imm,
5871 (__v4di)
5872 _mm256_setzero_si256 (),
5873 (__mmask8) __U);
5876 extern __inline __m128i
5877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5878 _mm512_extracti32x4_epi32 (__m512i __A, const int __imm)
5880 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5881 __imm,
5882 (__v4si)
5883 _mm_undefined_si128 (),
5884 (__mmask8) -1);
5887 extern __inline __m128i
5888 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5889 _mm512_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m512i __A,
5890 const int __imm)
5892 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5893 __imm,
5894 (__v4si) __W,
5895 (__mmask8) __U);
5898 extern __inline __m128i
5899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5900 _mm512_maskz_extracti32x4_epi32 (__mmask8 __U, __m512i __A, const int __imm)
5902 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5903 __imm,
5904 (__v4si)
5905 _mm_setzero_si128 (),
5906 (__mmask8) __U);
5908 #else
5910 #define _mm512_extractf64x4_pd(X, C) \
5911 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5912 (int) (C),\
5913 (__v4df)(__m256d)_mm256_undefined_pd(),\
5914 (__mmask8)-1))
5916 #define _mm512_mask_extractf64x4_pd(W, U, X, C) \
5917 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5918 (int) (C),\
5919 (__v4df)(__m256d)(W),\
5920 (__mmask8)(U)))
5922 #define _mm512_maskz_extractf64x4_pd(U, X, C) \
5923 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5924 (int) (C),\
5925 (__v4df)(__m256d)_mm256_setzero_pd(),\
5926 (__mmask8)(U)))
5928 #define _mm512_extractf32x4_ps(X, C) \
5929 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5930 (int) (C),\
5931 (__v4sf)(__m128)_mm_undefined_ps(),\
5932 (__mmask8)-1))
5934 #define _mm512_mask_extractf32x4_ps(W, U, X, C) \
5935 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5936 (int) (C),\
5937 (__v4sf)(__m128)(W),\
5938 (__mmask8)(U)))
5940 #define _mm512_maskz_extractf32x4_ps(U, X, C) \
5941 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5942 (int) (C),\
5943 (__v4sf)(__m128)_mm_setzero_ps(),\
5944 (__mmask8)(U)))
5946 #define _mm512_extracti64x4_epi64(X, C) \
5947 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5948 (int) (C),\
5949 (__v4di)(__m256i)_mm256_undefined_si256 (),\
5950 (__mmask8)-1))
5952 #define _mm512_mask_extracti64x4_epi64(W, U, X, C) \
5953 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5954 (int) (C),\
5955 (__v4di)(__m256i)(W),\
5956 (__mmask8)(U)))
5958 #define _mm512_maskz_extracti64x4_epi64(U, X, C) \
5959 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5960 (int) (C),\
5961 (__v4di)(__m256i)_mm256_setzero_si256 (),\
5962 (__mmask8)(U)))
5964 #define _mm512_extracti32x4_epi32(X, C) \
5965 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5966 (int) (C),\
5967 (__v4si)(__m128i)_mm_undefined_si128 (),\
5968 (__mmask8)-1))
5970 #define _mm512_mask_extracti32x4_epi32(W, U, X, C) \
5971 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5972 (int) (C),\
5973 (__v4si)(__m128i)(W),\
5974 (__mmask8)(U)))
5976 #define _mm512_maskz_extracti32x4_epi32(U, X, C) \
5977 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5978 (int) (C),\
5979 (__v4si)(__m128i)_mm_setzero_si128 (),\
5980 (__mmask8)(U)))
5981 #endif
5983 #ifdef __OPTIMIZE__
5984 extern __inline __m512i
5985 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5986 _mm512_inserti32x4 (__m512i __A, __m128i __B, const int __imm)
5988 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __A,
5989 (__v4si) __B,
5990 __imm,
5991 (__v16si) __A, -1);
5994 extern __inline __m512
5995 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5996 _mm512_insertf32x4 (__m512 __A, __m128 __B, const int __imm)
5998 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __A,
5999 (__v4sf) __B,
6000 __imm,
6001 (__v16sf) __A, -1);
6004 extern __inline __m512i
6005 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6006 _mm512_inserti64x4 (__m512i __A, __m256i __B, const int __imm)
6008 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
6009 (__v4di) __B,
6010 __imm,
6011 (__v8di)
6012 _mm512_undefined_epi32 (),
6013 (__mmask8) -1);
6016 extern __inline __m512i
6017 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6018 _mm512_mask_inserti64x4 (__m512i __W, __mmask8 __U, __m512i __A,
6019 __m256i __B, const int __imm)
6021 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
6022 (__v4di) __B,
6023 __imm,
6024 (__v8di) __W,
6025 (__mmask8) __U);
6028 extern __inline __m512i
6029 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6030 _mm512_maskz_inserti64x4 (__mmask8 __U, __m512i __A, __m256i __B,
6031 const int __imm)
6033 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
6034 (__v4di) __B,
6035 __imm,
6036 (__v8di)
6037 _mm512_setzero_si512 (),
6038 (__mmask8) __U);
6041 extern __inline __m512d
6042 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6043 _mm512_insertf64x4 (__m512d __A, __m256d __B, const int __imm)
6045 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
6046 (__v4df) __B,
6047 __imm,
6048 (__v8df)
6049 _mm512_undefined_pd (),
6050 (__mmask8) -1);
6053 extern __inline __m512d
6054 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6055 _mm512_mask_insertf64x4 (__m512d __W, __mmask8 __U, __m512d __A,
6056 __m256d __B, const int __imm)
6058 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
6059 (__v4df) __B,
6060 __imm,
6061 (__v8df) __W,
6062 (__mmask8) __U);
6065 extern __inline __m512d
6066 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6067 _mm512_maskz_insertf64x4 (__mmask8 __U, __m512d __A, __m256d __B,
6068 const int __imm)
6070 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
6071 (__v4df) __B,
6072 __imm,
6073 (__v8df)
6074 _mm512_setzero_pd (),
6075 (__mmask8) __U);
6077 #else
6078 #define _mm512_insertf32x4(X, Y, C) \
6079 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
6080 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (X), (__mmask16)(-1)))
6082 #define _mm512_inserti32x4(X, Y, C) \
6083 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
6084 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (X), (__mmask16)(-1)))
6086 #define _mm512_insertf64x4(X, Y, C) \
6087 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
6088 (__v4df)(__m256d) (Y), (int) (C), \
6089 (__v8df)(__m512d)_mm512_undefined_pd(), \
6090 (__mmask8)-1))
6092 #define _mm512_mask_insertf64x4(W, U, X, Y, C) \
6093 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
6094 (__v4df)(__m256d) (Y), (int) (C), \
6095 (__v8df)(__m512d)(W), \
6096 (__mmask8)(U)))
6098 #define _mm512_maskz_insertf64x4(U, X, Y, C) \
6099 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
6100 (__v4df)(__m256d) (Y), (int) (C), \
6101 (__v8df)(__m512d)_mm512_setzero_pd(), \
6102 (__mmask8)(U)))
6104 #define _mm512_inserti64x4(X, Y, C) \
6105 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
6106 (__v4di)(__m256i) (Y), (int) (C), \
6107 (__v8di)(__m512i)_mm512_undefined_epi32 (), \
6108 (__mmask8)-1))
6110 #define _mm512_mask_inserti64x4(W, U, X, Y, C) \
6111 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
6112 (__v4di)(__m256i) (Y), (int) (C),\
6113 (__v8di)(__m512i)(W),\
6114 (__mmask8)(U)))
6116 #define _mm512_maskz_inserti64x4(U, X, Y, C) \
6117 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
6118 (__v4di)(__m256i) (Y), (int) (C), \
6119 (__v8di)(__m512i)_mm512_setzero_si512 (), \
6120 (__mmask8)(U)))
6121 #endif
6123 extern __inline __m512d
6124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6125 _mm512_loadu_pd (void const *__P)
6127 return *(__m512d_u *)__P;
6130 extern __inline __m512d
6131 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6132 _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
6134 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
6135 (__v8df) __W,
6136 (__mmask8) __U);
6139 extern __inline __m512d
6140 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6141 _mm512_maskz_loadu_pd (__mmask8 __U, void const *__P)
6143 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
6144 (__v8df)
6145 _mm512_setzero_pd (),
6146 (__mmask8) __U);
6149 extern __inline void
6150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6151 _mm512_storeu_pd (void *__P, __m512d __A)
6153 *(__m512d_u *)__P = __A;
6156 extern __inline void
6157 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6158 _mm512_mask_storeu_pd (void *__P, __mmask8 __U, __m512d __A)
6160 __builtin_ia32_storeupd512_mask ((double *) __P, (__v8df) __A,
6161 (__mmask8) __U);
6164 extern __inline __m512
6165 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6166 _mm512_loadu_ps (void const *__P)
6168 return *(__m512_u *)__P;
6171 extern __inline __m512
6172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6173 _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
6175 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
6176 (__v16sf) __W,
6177 (__mmask16) __U);
6180 extern __inline __m512
6181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6182 _mm512_maskz_loadu_ps (__mmask16 __U, void const *__P)
6184 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
6185 (__v16sf)
6186 _mm512_setzero_ps (),
6187 (__mmask16) __U);
6190 extern __inline void
6191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6192 _mm512_storeu_ps (void *__P, __m512 __A)
6194 *(__m512_u *)__P = __A;
6197 extern __inline void
6198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6199 _mm512_mask_storeu_ps (void *__P, __mmask16 __U, __m512 __A)
6201 __builtin_ia32_storeups512_mask ((float *) __P, (__v16sf) __A,
6202 (__mmask16) __U);
6205 extern __inline __m512i
6206 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6207 _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
6209 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
6210 (__v8di) __W,
6211 (__mmask8) __U);
6214 extern __inline __m512i
6215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6216 _mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
6218 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
6219 (__v8di)
6220 _mm512_setzero_si512 (),
6221 (__mmask8) __U);
6224 extern __inline void
6225 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6226 _mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A)
6228 __builtin_ia32_storedqudi512_mask ((long long *) __P, (__v8di) __A,
6229 (__mmask8) __U);
6232 extern __inline __m512i
6233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6234 _mm512_loadu_si512 (void const *__P)
6236 return *(__m512i_u *)__P;
6239 extern __inline __m512i
6240 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6241 _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
6243 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
6244 (__v16si) __W,
6245 (__mmask16) __U);
6248 extern __inline __m512i
6249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6250 _mm512_maskz_loadu_epi32 (__mmask16 __U, void const *__P)
6252 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
6253 (__v16si)
6254 _mm512_setzero_si512 (),
6255 (__mmask16) __U);
6258 extern __inline void
6259 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6260 _mm512_storeu_si512 (void *__P, __m512i __A)
6262 *(__m512i_u *)__P = __A;
6265 extern __inline void
6266 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6267 _mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A)
6269 __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A,
6270 (__mmask16) __U);
6273 extern __inline __m512d
6274 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6275 _mm512_permutevar_pd (__m512d __A, __m512i __C)
6277 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
6278 (__v8di) __C,
6279 (__v8df)
6280 _mm512_undefined_pd (),
6281 (__mmask8) -1);
6284 extern __inline __m512d
6285 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6286 _mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
6288 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
6289 (__v8di) __C,
6290 (__v8df) __W,
6291 (__mmask8) __U);
6294 extern __inline __m512d
6295 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6296 _mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C)
6298 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
6299 (__v8di) __C,
6300 (__v8df)
6301 _mm512_setzero_pd (),
6302 (__mmask8) __U);
6305 extern __inline __m512
6306 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6307 _mm512_permutevar_ps (__m512 __A, __m512i __C)
6309 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
6310 (__v16si) __C,
6311 (__v16sf)
6312 _mm512_undefined_ps (),
6313 (__mmask16) -1);
6316 extern __inline __m512
6317 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6318 _mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
6320 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
6321 (__v16si) __C,
6322 (__v16sf) __W,
6323 (__mmask16) __U);
6326 extern __inline __m512
6327 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6328 _mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C)
6330 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
6331 (__v16si) __C,
6332 (__v16sf)
6333 _mm512_setzero_ps (),
6334 (__mmask16) __U);
6337 extern __inline __m512i
6338 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6339 _mm512_permutex2var_epi64 (__m512i __A, __m512i __I, __m512i __B)
6341 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
6342 /* idx */ ,
6343 (__v8di) __A,
6344 (__v8di) __B,
6345 (__mmask8) -1);
6348 extern __inline __m512i
6349 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6350 _mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
6351 __m512i __B)
6353 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
6354 /* idx */ ,
6355 (__v8di) __A,
6356 (__v8di) __B,
6357 (__mmask8) __U);
6360 extern __inline __m512i
6361 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6362 _mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
6363 __mmask8 __U, __m512i __B)
6365 return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
6366 (__v8di) __I
6367 /* idx */ ,
6368 (__v8di) __B,
6369 (__mmask8) __U);
6372 extern __inline __m512i
6373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6374 _mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
6375 __m512i __I, __m512i __B)
6377 return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
6378 /* idx */ ,
6379 (__v8di) __A,
6380 (__v8di) __B,
6381 (__mmask8) __U);
6384 extern __inline __m512i
6385 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6386 _mm512_permutex2var_epi32 (__m512i __A, __m512i __I, __m512i __B)
6388 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
6389 /* idx */ ,
6390 (__v16si) __A,
6391 (__v16si) __B,
6392 (__mmask16) -1);
6395 extern __inline __m512i
6396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6397 _mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
6398 __m512i __I, __m512i __B)
6400 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
6401 /* idx */ ,
6402 (__v16si) __A,
6403 (__v16si) __B,
6404 (__mmask16) __U);
6407 extern __inline __m512i
6408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6409 _mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
6410 __mmask16 __U, __m512i __B)
6412 return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
6413 (__v16si) __I
6414 /* idx */ ,
6415 (__v16si) __B,
6416 (__mmask16) __U);
6419 extern __inline __m512i
6420 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6421 _mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
6422 __m512i __I, __m512i __B)
6424 return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
6425 /* idx */ ,
6426 (__v16si) __A,
6427 (__v16si) __B,
6428 (__mmask16) __U);
6431 extern __inline __m512d
6432 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6433 _mm512_permutex2var_pd (__m512d __A, __m512i __I, __m512d __B)
6435 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6436 /* idx */ ,
6437 (__v8df) __A,
6438 (__v8df) __B,
6439 (__mmask8) -1);
6442 extern __inline __m512d
6443 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6444 _mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I,
6445 __m512d __B)
6447 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6448 /* idx */ ,
6449 (__v8df) __A,
6450 (__v8df) __B,
6451 (__mmask8) __U);
6454 extern __inline __m512d
6455 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6456 _mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
6457 __m512d __B)
6459 return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
6460 (__v8di) __I
6461 /* idx */ ,
6462 (__v8df) __B,
6463 (__mmask8) __U);
6466 extern __inline __m512d
6467 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6468 _mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
6469 __m512d __B)
6471 return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
6472 /* idx */ ,
6473 (__v8df) __A,
6474 (__v8df) __B,
6475 (__mmask8) __U);
6478 extern __inline __m512
6479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6480 _mm512_permutex2var_ps (__m512 __A, __m512i __I, __m512 __B)
6482 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6483 /* idx */ ,
6484 (__v16sf) __A,
6485 (__v16sf) __B,
6486 (__mmask16) -1);
6489 extern __inline __m512
6490 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6491 _mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
6493 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6494 /* idx */ ,
6495 (__v16sf) __A,
6496 (__v16sf) __B,
6497 (__mmask16) __U);
6500 extern __inline __m512
6501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6502 _mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
6503 __m512 __B)
6505 return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
6506 (__v16si) __I
6507 /* idx */ ,
6508 (__v16sf) __B,
6509 (__mmask16) __U);
6512 extern __inline __m512
6513 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6514 _mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
6515 __m512 __B)
6517 return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
6518 /* idx */ ,
6519 (__v16sf) __A,
6520 (__v16sf) __B,
6521 (__mmask16) __U);
6524 #ifdef __OPTIMIZE__
6525 extern __inline __m512d
6526 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6527 _mm512_permute_pd (__m512d __X, const int __C)
6529 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6530 (__v8df)
6531 _mm512_undefined_pd (),
6532 (__mmask8) -1);
6535 extern __inline __m512d
6536 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6537 _mm512_mask_permute_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __C)
6539 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6540 (__v8df) __W,
6541 (__mmask8) __U);
6544 extern __inline __m512d
6545 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6546 _mm512_maskz_permute_pd (__mmask8 __U, __m512d __X, const int __C)
6548 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6549 (__v8df)
6550 _mm512_setzero_pd (),
6551 (__mmask8) __U);
6554 extern __inline __m512
6555 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6556 _mm512_permute_ps (__m512 __X, const int __C)
6558 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6559 (__v16sf)
6560 _mm512_undefined_ps (),
6561 (__mmask16) -1);
6564 extern __inline __m512
6565 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6566 _mm512_mask_permute_ps (__m512 __W, __mmask16 __U, __m512 __X, const int __C)
6568 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6569 (__v16sf) __W,
6570 (__mmask16) __U);
6573 extern __inline __m512
6574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6575 _mm512_maskz_permute_ps (__mmask16 __U, __m512 __X, const int __C)
6577 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6578 (__v16sf)
6579 _mm512_setzero_ps (),
6580 (__mmask16) __U);
6582 #else
6583 #define _mm512_permute_pd(X, C) \
6584 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6585 (__v8df)(__m512d)_mm512_undefined_pd(),\
6586 (__mmask8)(-1)))
6588 #define _mm512_mask_permute_pd(W, U, X, C) \
6589 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6590 (__v8df)(__m512d)(W), \
6591 (__mmask8)(U)))
6593 #define _mm512_maskz_permute_pd(U, X, C) \
6594 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6595 (__v8df)(__m512d)_mm512_setzero_pd(), \
6596 (__mmask8)(U)))
6598 #define _mm512_permute_ps(X, C) \
6599 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6600 (__v16sf)(__m512)_mm512_undefined_ps(),\
6601 (__mmask16)(-1)))
6603 #define _mm512_mask_permute_ps(W, U, X, C) \
6604 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6605 (__v16sf)(__m512)(W), \
6606 (__mmask16)(U)))
6608 #define _mm512_maskz_permute_ps(U, X, C) \
6609 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6610 (__v16sf)(__m512)_mm512_setzero_ps(), \
6611 (__mmask16)(U)))
6612 #endif
6614 #ifdef __OPTIMIZE__
6615 extern __inline __m512i
6616 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6617 _mm512_permutex_epi64 (__m512i __X, const int __I)
6619 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6620 (__v8di)
6621 _mm512_undefined_epi32 (),
6622 (__mmask8) (-1));
6625 extern __inline __m512i
6626 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6627 _mm512_mask_permutex_epi64 (__m512i __W, __mmask8 __M,
6628 __m512i __X, const int __I)
6630 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6631 (__v8di) __W,
6632 (__mmask8) __M);
6635 extern __inline __m512i
6636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6637 _mm512_maskz_permutex_epi64 (__mmask8 __M, __m512i __X, const int __I)
6639 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6640 (__v8di)
6641 _mm512_setzero_si512 (),
6642 (__mmask8) __M);
6645 extern __inline __m512d
6646 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6647 _mm512_permutex_pd (__m512d __X, const int __M)
6649 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6650 (__v8df)
6651 _mm512_undefined_pd (),
6652 (__mmask8) -1);
6655 extern __inline __m512d
6656 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6657 _mm512_mask_permutex_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __M)
6659 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6660 (__v8df) __W,
6661 (__mmask8) __U);
6664 extern __inline __m512d
6665 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6666 _mm512_maskz_permutex_pd (__mmask8 __U, __m512d __X, const int __M)
6668 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6669 (__v8df)
6670 _mm512_setzero_pd (),
6671 (__mmask8) __U);
6673 #else
6674 #define _mm512_permutex_pd(X, M) \
6675 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6676 (__v8df)(__m512d)_mm512_undefined_pd(),\
6677 (__mmask8)-1))
6679 #define _mm512_mask_permutex_pd(W, U, X, M) \
6680 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6681 (__v8df)(__m512d)(W), (__mmask8)(U)))
6683 #define _mm512_maskz_permutex_pd(U, X, M) \
6684 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6685 (__v8df)(__m512d)_mm512_setzero_pd(),\
6686 (__mmask8)(U)))
6688 #define _mm512_permutex_epi64(X, I) \
6689 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6690 (int)(I), \
6691 (__v8di)(__m512i) \
6692 (_mm512_undefined_epi32 ()),\
6693 (__mmask8)(-1)))
6695 #define _mm512_maskz_permutex_epi64(M, X, I) \
6696 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6697 (int)(I), \
6698 (__v8di)(__m512i) \
6699 (_mm512_setzero_si512 ()),\
6700 (__mmask8)(M)))
6702 #define _mm512_mask_permutex_epi64(W, M, X, I) \
6703 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6704 (int)(I), \
6705 (__v8di)(__m512i)(W), \
6706 (__mmask8)(M)))
6707 #endif
6709 extern __inline __m512i
6710 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6711 _mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
6713 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6714 (__v8di) __X,
6715 (__v8di)
6716 _mm512_setzero_si512 (),
6717 __M);
6720 extern __inline __m512i
6721 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6722 _mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
6724 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6725 (__v8di) __X,
6726 (__v8di)
6727 _mm512_undefined_epi32 (),
6728 (__mmask8) -1);
6731 extern __inline __m512i
6732 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6733 _mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
6734 __m512i __Y)
6736 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6737 (__v8di) __X,
6738 (__v8di) __W,
6739 __M);
6742 extern __inline __m512i
6743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6744 _mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
6746 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6747 (__v16si) __X,
6748 (__v16si)
6749 _mm512_setzero_si512 (),
6750 __M);
6753 extern __inline __m512i
6754 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6755 _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
6757 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6758 (__v16si) __X,
6759 (__v16si)
6760 _mm512_undefined_epi32 (),
6761 (__mmask16) -1);
6764 extern __inline __m512i
6765 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6766 _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
6767 __m512i __Y)
6769 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6770 (__v16si) __X,
6771 (__v16si) __W,
6772 __M);
6775 extern __inline __m512d
6776 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6777 _mm512_permutexvar_pd (__m512i __X, __m512d __Y)
6779 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6780 (__v8di) __X,
6781 (__v8df)
6782 _mm512_undefined_pd (),
6783 (__mmask8) -1);
6786 extern __inline __m512d
6787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6788 _mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
6790 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6791 (__v8di) __X,
6792 (__v8df) __W,
6793 (__mmask8) __U);
6796 extern __inline __m512d
6797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6798 _mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
6800 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6801 (__v8di) __X,
6802 (__v8df)
6803 _mm512_setzero_pd (),
6804 (__mmask8) __U);
6807 extern __inline __m512
6808 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6809 _mm512_permutexvar_ps (__m512i __X, __m512 __Y)
6811 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6812 (__v16si) __X,
6813 (__v16sf)
6814 _mm512_undefined_ps (),
6815 (__mmask16) -1);
6818 extern __inline __m512
6819 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6820 _mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
6822 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6823 (__v16si) __X,
6824 (__v16sf) __W,
6825 (__mmask16) __U);
6828 extern __inline __m512
6829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6830 _mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
6832 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6833 (__v16si) __X,
6834 (__v16sf)
6835 _mm512_setzero_ps (),
6836 (__mmask16) __U);
6839 #ifdef __OPTIMIZE__
6840 extern __inline __m512
6841 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6842 _mm512_shuffle_ps (__m512 __M, __m512 __V, const int __imm)
6844 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6845 (__v16sf) __V, __imm,
6846 (__v16sf)
6847 _mm512_undefined_ps (),
6848 (__mmask16) -1);
6851 extern __inline __m512
6852 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6853 _mm512_mask_shuffle_ps (__m512 __W, __mmask16 __U, __m512 __M,
6854 __m512 __V, const int __imm)
6856 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6857 (__v16sf) __V, __imm,
6858 (__v16sf) __W,
6859 (__mmask16) __U);
6862 extern __inline __m512
6863 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6864 _mm512_maskz_shuffle_ps (__mmask16 __U, __m512 __M, __m512 __V, const int __imm)
6866 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6867 (__v16sf) __V, __imm,
6868 (__v16sf)
6869 _mm512_setzero_ps (),
6870 (__mmask16) __U);
6873 extern __inline __m512d
6874 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6875 _mm512_shuffle_pd (__m512d __M, __m512d __V, const int __imm)
6877 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6878 (__v8df) __V, __imm,
6879 (__v8df)
6880 _mm512_undefined_pd (),
6881 (__mmask8) -1);
6884 extern __inline __m512d
6885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6886 _mm512_mask_shuffle_pd (__m512d __W, __mmask8 __U, __m512d __M,
6887 __m512d __V, const int __imm)
6889 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6890 (__v8df) __V, __imm,
6891 (__v8df) __W,
6892 (__mmask8) __U);
6895 extern __inline __m512d
6896 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6897 _mm512_maskz_shuffle_pd (__mmask8 __U, __m512d __M, __m512d __V,
6898 const int __imm)
6900 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6901 (__v8df) __V, __imm,
6902 (__v8df)
6903 _mm512_setzero_pd (),
6904 (__mmask8) __U);
6907 extern __inline __m512d
6908 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6909 _mm512_fixupimm_round_pd (__m512d __A, __m512d __B, __m512i __C,
6910 const int __imm, const int __R)
6912 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
6913 (__v8df) __B,
6914 (__v8di) __C,
6915 __imm,
6916 (__mmask8) -1, __R);
6919 extern __inline __m512d
6920 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6921 _mm512_mask_fixupimm_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
6922 __m512i __C, const int __imm, const int __R)
6924 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
6925 (__v8df) __B,
6926 (__v8di) __C,
6927 __imm,
6928 (__mmask8) __U, __R);
6931 extern __inline __m512d
6932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6933 _mm512_maskz_fixupimm_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
6934 __m512i __C, const int __imm, const int __R)
6936 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
6937 (__v8df) __B,
6938 (__v8di) __C,
6939 __imm,
6940 (__mmask8) __U, __R);
6943 extern __inline __m512
6944 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6945 _mm512_fixupimm_round_ps (__m512 __A, __m512 __B, __m512i __C,
6946 const int __imm, const int __R)
6948 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
6949 (__v16sf) __B,
6950 (__v16si) __C,
6951 __imm,
6952 (__mmask16) -1, __R);
6955 extern __inline __m512
6956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6957 _mm512_mask_fixupimm_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
6958 __m512i __C, const int __imm, const int __R)
6960 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
6961 (__v16sf) __B,
6962 (__v16si) __C,
6963 __imm,
6964 (__mmask16) __U, __R);
6967 extern __inline __m512
6968 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6969 _mm512_maskz_fixupimm_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
6970 __m512i __C, const int __imm, const int __R)
6972 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
6973 (__v16sf) __B,
6974 (__v16si) __C,
6975 __imm,
6976 (__mmask16) __U, __R);
6979 extern __inline __m128d
6980 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6981 _mm_fixupimm_round_sd (__m128d __A, __m128d __B, __m128i __C,
6982 const int __imm, const int __R)
6984 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
6985 (__v2df) __B,
6986 (__v2di) __C, __imm,
6987 (__mmask8) -1, __R);
6990 extern __inline __m128d
6991 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6992 _mm_mask_fixupimm_round_sd (__m128d __A, __mmask8 __U, __m128d __B,
6993 __m128i __C, const int __imm, const int __R)
6995 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
6996 (__v2df) __B,
6997 (__v2di) __C, __imm,
6998 (__mmask8) __U, __R);
7001 extern __inline __m128d
7002 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7003 _mm_maskz_fixupimm_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
7004 __m128i __C, const int __imm, const int __R)
7006 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
7007 (__v2df) __B,
7008 (__v2di) __C,
7009 __imm,
7010 (__mmask8) __U, __R);
7013 extern __inline __m128
7014 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7015 _mm_fixupimm_round_ss (__m128 __A, __m128 __B, __m128i __C,
7016 const int __imm, const int __R)
7018 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
7019 (__v4sf) __B,
7020 (__v4si) __C, __imm,
7021 (__mmask8) -1, __R);
7024 extern __inline __m128
7025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7026 _mm_mask_fixupimm_round_ss (__m128 __A, __mmask8 __U, __m128 __B,
7027 __m128i __C, const int __imm, const int __R)
7029 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
7030 (__v4sf) __B,
7031 (__v4si) __C, __imm,
7032 (__mmask8) __U, __R);
7035 extern __inline __m128
7036 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7037 _mm_maskz_fixupimm_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
7038 __m128i __C, const int __imm, const int __R)
7040 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
7041 (__v4sf) __B,
7042 (__v4si) __C, __imm,
7043 (__mmask8) __U, __R);
7046 #else
7047 #define _mm512_shuffle_pd(X, Y, C) \
7048 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
7049 (__v8df)(__m512d)(Y), (int)(C),\
7050 (__v8df)(__m512d)_mm512_undefined_pd(),\
7051 (__mmask8)-1))
7053 #define _mm512_mask_shuffle_pd(W, U, X, Y, C) \
7054 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
7055 (__v8df)(__m512d)(Y), (int)(C),\
7056 (__v8df)(__m512d)(W),\
7057 (__mmask8)(U)))
7059 #define _mm512_maskz_shuffle_pd(U, X, Y, C) \
7060 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
7061 (__v8df)(__m512d)(Y), (int)(C),\
7062 (__v8df)(__m512d)_mm512_setzero_pd(),\
7063 (__mmask8)(U)))
7065 #define _mm512_shuffle_ps(X, Y, C) \
7066 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
7067 (__v16sf)(__m512)(Y), (int)(C),\
7068 (__v16sf)(__m512)_mm512_undefined_ps(),\
7069 (__mmask16)-1))
7071 #define _mm512_mask_shuffle_ps(W, U, X, Y, C) \
7072 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
7073 (__v16sf)(__m512)(Y), (int)(C),\
7074 (__v16sf)(__m512)(W),\
7075 (__mmask16)(U)))
7077 #define _mm512_maskz_shuffle_ps(U, X, Y, C) \
7078 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
7079 (__v16sf)(__m512)(Y), (int)(C),\
7080 (__v16sf)(__m512)_mm512_setzero_ps(),\
7081 (__mmask16)(U)))
7083 #define _mm512_fixupimm_round_pd(X, Y, Z, C, R) \
7084 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
7085 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
7086 (__mmask8)(-1), (R)))
7088 #define _mm512_mask_fixupimm_round_pd(X, U, Y, Z, C, R) \
7089 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
7090 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
7091 (__mmask8)(U), (R)))
7093 #define _mm512_maskz_fixupimm_round_pd(U, X, Y, Z, C, R) \
7094 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
7095 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
7096 (__mmask8)(U), (R)))
7098 #define _mm512_fixupimm_round_ps(X, Y, Z, C, R) \
7099 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
7100 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
7101 (__mmask16)(-1), (R)))
7103 #define _mm512_mask_fixupimm_round_ps(X, U, Y, Z, C, R) \
7104 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
7105 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
7106 (__mmask16)(U), (R)))
7108 #define _mm512_maskz_fixupimm_round_ps(U, X, Y, Z, C, R) \
7109 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
7110 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
7111 (__mmask16)(U), (R)))
7113 #define _mm_fixupimm_round_sd(X, Y, Z, C, R) \
7114 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
7115 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
7116 (__mmask8)(-1), (R)))
7118 #define _mm_mask_fixupimm_round_sd(X, U, Y, Z, C, R) \
7119 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
7120 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
7121 (__mmask8)(U), (R)))
7123 #define _mm_maskz_fixupimm_round_sd(U, X, Y, Z, C, R) \
7124 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
7125 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
7126 (__mmask8)(U), (R)))
7128 #define _mm_fixupimm_round_ss(X, Y, Z, C, R) \
7129 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
7130 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
7131 (__mmask8)(-1), (R)))
7133 #define _mm_mask_fixupimm_round_ss(X, U, Y, Z, C, R) \
7134 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
7135 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
7136 (__mmask8)(U), (R)))
7138 #define _mm_maskz_fixupimm_round_ss(U, X, Y, Z, C, R) \
7139 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
7140 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
7141 (__mmask8)(U), (R)))
7142 #endif
7144 extern __inline __m512
7145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7146 _mm512_movehdup_ps (__m512 __A)
7148 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
7149 (__v16sf)
7150 _mm512_undefined_ps (),
7151 (__mmask16) -1);
7154 extern __inline __m512
7155 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7156 _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
7158 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
7159 (__v16sf) __W,
7160 (__mmask16) __U);
7163 extern __inline __m512
7164 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7165 _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
7167 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
7168 (__v16sf)
7169 _mm512_setzero_ps (),
7170 (__mmask16) __U);
7173 extern __inline __m512
7174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7175 _mm512_moveldup_ps (__m512 __A)
7177 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
7178 (__v16sf)
7179 _mm512_undefined_ps (),
7180 (__mmask16) -1);
7183 extern __inline __m512
7184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7185 _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
7187 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
7188 (__v16sf) __W,
7189 (__mmask16) __U);
7192 extern __inline __m512
7193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7194 _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
7196 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
7197 (__v16sf)
7198 _mm512_setzero_ps (),
7199 (__mmask16) __U);
7202 extern __inline __m512i
7203 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7204 _mm512_or_si512 (__m512i __A, __m512i __B)
7206 return (__m512i) ((__v16su) __A | (__v16su) __B);
7209 extern __inline __m512i
7210 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7211 _mm512_or_epi32 (__m512i __A, __m512i __B)
7213 return (__m512i) ((__v16su) __A | (__v16su) __B);
7216 extern __inline __m512i
7217 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7218 _mm512_mask_or_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7220 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
7221 (__v16si) __B,
7222 (__v16si) __W,
7223 (__mmask16) __U);
7226 extern __inline __m512i
7227 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7228 _mm512_maskz_or_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7230 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
7231 (__v16si) __B,
7232 (__v16si)
7233 _mm512_setzero_si512 (),
7234 (__mmask16) __U);
7237 extern __inline __m512i
7238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7239 _mm512_or_epi64 (__m512i __A, __m512i __B)
7241 return (__m512i) ((__v8du) __A | (__v8du) __B);
7244 extern __inline __m512i
7245 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7246 _mm512_mask_or_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7248 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
7249 (__v8di) __B,
7250 (__v8di) __W,
7251 (__mmask8) __U);
7254 extern __inline __m512i
7255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7256 _mm512_maskz_or_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7258 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
7259 (__v8di) __B,
7260 (__v8di)
7261 _mm512_setzero_si512 (),
7262 (__mmask8) __U);
7265 extern __inline __m512i
7266 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7267 _mm512_xor_si512 (__m512i __A, __m512i __B)
7269 return (__m512i) ((__v16su) __A ^ (__v16su) __B);
7272 extern __inline __m512i
7273 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7274 _mm512_xor_epi32 (__m512i __A, __m512i __B)
7276 return (__m512i) ((__v16su) __A ^ (__v16su) __B);
7279 extern __inline __m512i
7280 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7281 _mm512_mask_xor_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7283 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
7284 (__v16si) __B,
7285 (__v16si) __W,
7286 (__mmask16) __U);
7289 extern __inline __m512i
7290 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7291 _mm512_maskz_xor_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7293 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
7294 (__v16si) __B,
7295 (__v16si)
7296 _mm512_setzero_si512 (),
7297 (__mmask16) __U);
7300 extern __inline __m512i
7301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7302 _mm512_xor_epi64 (__m512i __A, __m512i __B)
7304 return (__m512i) ((__v8du) __A ^ (__v8du) __B);
7307 extern __inline __m512i
7308 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7309 _mm512_mask_xor_epi64 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7311 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
7312 (__v8di) __B,
7313 (__v8di) __W,
7314 (__mmask8) __U);
7317 extern __inline __m512i
7318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7319 _mm512_maskz_xor_epi64 (__mmask16 __U, __m512i __A, __m512i __B)
7321 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
7322 (__v8di) __B,
7323 (__v8di)
7324 _mm512_setzero_si512 (),
7325 (__mmask8) __U);
7328 #ifdef __OPTIMIZE__
7329 extern __inline __m512i
7330 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7331 _mm512_rol_epi32 (__m512i __A, const int __B)
7333 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
7334 (__v16si)
7335 _mm512_undefined_epi32 (),
7336 (__mmask16) -1);
7339 extern __inline __m512i
7340 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7341 _mm512_mask_rol_epi32 (__m512i __W, __mmask16 __U, __m512i __A, const int __B)
7343 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
7344 (__v16si) __W,
7345 (__mmask16) __U);
7348 extern __inline __m512i
7349 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7350 _mm512_maskz_rol_epi32 (__mmask16 __U, __m512i __A, const int __B)
7352 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
7353 (__v16si)
7354 _mm512_setzero_si512 (),
7355 (__mmask16) __U);
7358 extern __inline __m512i
7359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7360 _mm512_ror_epi32 (__m512i __A, int __B)
7362 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
7363 (__v16si)
7364 _mm512_undefined_epi32 (),
7365 (__mmask16) -1);
7368 extern __inline __m512i
7369 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7370 _mm512_mask_ror_epi32 (__m512i __W, __mmask16 __U, __m512i __A, int __B)
7372 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
7373 (__v16si) __W,
7374 (__mmask16) __U);
7377 extern __inline __m512i
7378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7379 _mm512_maskz_ror_epi32 (__mmask16 __U, __m512i __A, int __B)
7381 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
7382 (__v16si)
7383 _mm512_setzero_si512 (),
7384 (__mmask16) __U);
7387 extern __inline __m512i
7388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7389 _mm512_rol_epi64 (__m512i __A, const int __B)
7391 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
7392 (__v8di)
7393 _mm512_undefined_epi32 (),
7394 (__mmask8) -1);
7397 extern __inline __m512i
7398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7399 _mm512_mask_rol_epi64 (__m512i __W, __mmask8 __U, __m512i __A, const int __B)
7401 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
7402 (__v8di) __W,
7403 (__mmask8) __U);
7406 extern __inline __m512i
7407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7408 _mm512_maskz_rol_epi64 (__mmask8 __U, __m512i __A, const int __B)
7410 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
7411 (__v8di)
7412 _mm512_setzero_si512 (),
7413 (__mmask8) __U);
7416 extern __inline __m512i
7417 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7418 _mm512_ror_epi64 (__m512i __A, int __B)
7420 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
7421 (__v8di)
7422 _mm512_undefined_epi32 (),
7423 (__mmask8) -1);
7426 extern __inline __m512i
7427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7428 _mm512_mask_ror_epi64 (__m512i __W, __mmask8 __U, __m512i __A, int __B)
7430 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
7431 (__v8di) __W,
7432 (__mmask8) __U);
7435 extern __inline __m512i
7436 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7437 _mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B)
7439 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
7440 (__v8di)
7441 _mm512_setzero_si512 (),
7442 (__mmask8) __U);
7445 #else
7446 #define _mm512_rol_epi32(A, B) \
7447 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7448 (int)(B), \
7449 (__v16si)_mm512_undefined_epi32 (), \
7450 (__mmask16)(-1)))
7451 #define _mm512_mask_rol_epi32(W, U, A, B) \
7452 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7453 (int)(B), \
7454 (__v16si)(__m512i)(W), \
7455 (__mmask16)(U)))
7456 #define _mm512_maskz_rol_epi32(U, A, B) \
7457 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7458 (int)(B), \
7459 (__v16si)_mm512_setzero_si512 (), \
7460 (__mmask16)(U)))
7461 #define _mm512_ror_epi32(A, B) \
7462 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7463 (int)(B), \
7464 (__v16si)_mm512_undefined_epi32 (), \
7465 (__mmask16)(-1)))
7466 #define _mm512_mask_ror_epi32(W, U, A, B) \
7467 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7468 (int)(B), \
7469 (__v16si)(__m512i)(W), \
7470 (__mmask16)(U)))
7471 #define _mm512_maskz_ror_epi32(U, A, B) \
7472 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7473 (int)(B), \
7474 (__v16si)_mm512_setzero_si512 (), \
7475 (__mmask16)(U)))
7476 #define _mm512_rol_epi64(A, B) \
7477 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7478 (int)(B), \
7479 (__v8di)_mm512_undefined_epi32 (), \
7480 (__mmask8)(-1)))
7481 #define _mm512_mask_rol_epi64(W, U, A, B) \
7482 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7483 (int)(B), \
7484 (__v8di)(__m512i)(W), \
7485 (__mmask8)(U)))
7486 #define _mm512_maskz_rol_epi64(U, A, B) \
7487 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7488 (int)(B), \
7489 (__v8di)_mm512_setzero_si512 (), \
7490 (__mmask8)(U)))
7492 #define _mm512_ror_epi64(A, B) \
7493 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7494 (int)(B), \
7495 (__v8di)_mm512_undefined_epi32 (), \
7496 (__mmask8)(-1)))
7497 #define _mm512_mask_ror_epi64(W, U, A, B) \
7498 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7499 (int)(B), \
7500 (__v8di)(__m512i)(W), \
7501 (__mmask8)(U)))
7502 #define _mm512_maskz_ror_epi64(U, A, B) \
7503 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7504 (int)(B), \
7505 (__v8di)_mm512_setzero_si512 (), \
7506 (__mmask8)(U)))
7507 #endif
7509 extern __inline __m512i
7510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7511 _mm512_and_si512 (__m512i __A, __m512i __B)
7513 return (__m512i) ((__v16su) __A & (__v16su) __B);
7516 extern __inline __m512i
7517 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7518 _mm512_and_epi32 (__m512i __A, __m512i __B)
7520 return (__m512i) ((__v16su) __A & (__v16su) __B);
7523 extern __inline __m512i
7524 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7525 _mm512_mask_and_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7527 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7528 (__v16si) __B,
7529 (__v16si) __W,
7530 (__mmask16) __U);
7533 extern __inline __m512i
7534 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7535 _mm512_maskz_and_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7537 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7538 (__v16si) __B,
7539 (__v16si)
7540 _mm512_setzero_si512 (),
7541 (__mmask16) __U);
7544 extern __inline __m512i
7545 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7546 _mm512_and_epi64 (__m512i __A, __m512i __B)
7548 return (__m512i) ((__v8du) __A & (__v8du) __B);
7551 extern __inline __m512i
7552 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7553 _mm512_mask_and_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7555 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7556 (__v8di) __B,
7557 (__v8di) __W, __U);
7560 extern __inline __m512i
7561 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7562 _mm512_maskz_and_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7564 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7565 (__v8di) __B,
7566 (__v8di)
7567 _mm512_setzero_pd (),
7568 __U);
7571 extern __inline __m512i
7572 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7573 _mm512_andnot_si512 (__m512i __A, __m512i __B)
7575 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7576 (__v16si) __B,
7577 (__v16si)
7578 _mm512_undefined_epi32 (),
7579 (__mmask16) -1);
7582 extern __inline __m512i
7583 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7584 _mm512_andnot_epi32 (__m512i __A, __m512i __B)
7586 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7587 (__v16si) __B,
7588 (__v16si)
7589 _mm512_undefined_epi32 (),
7590 (__mmask16) -1);
7593 extern __inline __m512i
7594 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7595 _mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7597 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7598 (__v16si) __B,
7599 (__v16si) __W,
7600 (__mmask16) __U);
7603 extern __inline __m512i
7604 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7605 _mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7607 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7608 (__v16si) __B,
7609 (__v16si)
7610 _mm512_setzero_si512 (),
7611 (__mmask16) __U);
7614 extern __inline __m512i
7615 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7616 _mm512_andnot_epi64 (__m512i __A, __m512i __B)
7618 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7619 (__v8di) __B,
7620 (__v8di)
7621 _mm512_undefined_epi32 (),
7622 (__mmask8) -1);
7625 extern __inline __m512i
7626 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7627 _mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7629 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7630 (__v8di) __B,
7631 (__v8di) __W, __U);
7634 extern __inline __m512i
7635 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7636 _mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7638 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7639 (__v8di) __B,
7640 (__v8di)
7641 _mm512_setzero_pd (),
7642 __U);
7645 extern __inline __mmask16
7646 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7647 _mm512_test_epi32_mask (__m512i __A, __m512i __B)
7649 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7650 (__v16si) __B,
7651 (__mmask16) -1);
7654 extern __inline __mmask16
7655 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7656 _mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7658 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7659 (__v16si) __B, __U);
7662 extern __inline __mmask8
7663 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7664 _mm512_test_epi64_mask (__m512i __A, __m512i __B)
7666 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
7667 (__v8di) __B,
7668 (__mmask8) -1);
7671 extern __inline __mmask8
7672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7673 _mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7675 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
7678 extern __inline __mmask16
7679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7680 _mm512_testn_epi32_mask (__m512i __A, __m512i __B)
7682 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7683 (__v16si) __B,
7684 (__mmask16) -1);
7687 extern __inline __mmask16
7688 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7689 _mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7691 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7692 (__v16si) __B, __U);
7695 extern __inline __mmask8
7696 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7697 _mm512_testn_epi64_mask (__m512i __A, __m512i __B)
7699 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7700 (__v8di) __B,
7701 (__mmask8) -1);
7704 extern __inline __mmask8
7705 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7706 _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7708 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7709 (__v8di) __B, __U);
7712 extern __inline __m512
7713 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7714 _mm512_abs_ps (__m512 __A)
7716 return (__m512) _mm512_and_epi32 ((__m512i) __A,
7717 _mm512_set1_epi32 (0x7fffffff));
7720 extern __inline __m512
7721 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7722 _mm512_mask_abs_ps (__m512 __W, __mmask16 __U, __m512 __A)
7724 return (__m512) _mm512_mask_and_epi32 ((__m512i) __W, __U, (__m512i) __A,
7725 _mm512_set1_epi32 (0x7fffffff));
7728 extern __inline __m512d
7729 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7730 _mm512_abs_pd (__m512 __A)
7732 return (__m512d) _mm512_and_epi64 ((__m512i) __A,
7733 _mm512_set1_epi64 (0x7fffffffffffffffLL));
7736 extern __inline __m512d
7737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7738 _mm512_mask_abs_pd (__m512d __W, __mmask8 __U, __m512 __A)
7740 return (__m512d)
7741 _mm512_mask_and_epi64 ((__m512i) __W, __U, (__m512i) __A,
7742 _mm512_set1_epi64 (0x7fffffffffffffffLL));
7745 extern __inline __m512i
7746 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7747 _mm512_unpackhi_epi32 (__m512i __A, __m512i __B)
7749 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7750 (__v16si) __B,
7751 (__v16si)
7752 _mm512_undefined_epi32 (),
7753 (__mmask16) -1);
7756 extern __inline __m512i
7757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7758 _mm512_mask_unpackhi_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7759 __m512i __B)
7761 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7762 (__v16si) __B,
7763 (__v16si) __W,
7764 (__mmask16) __U);
7767 extern __inline __m512i
7768 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7769 _mm512_maskz_unpackhi_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7771 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7772 (__v16si) __B,
7773 (__v16si)
7774 _mm512_setzero_si512 (),
7775 (__mmask16) __U);
7778 extern __inline __m512i
7779 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7780 _mm512_unpackhi_epi64 (__m512i __A, __m512i __B)
7782 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7783 (__v8di) __B,
7784 (__v8di)
7785 _mm512_undefined_epi32 (),
7786 (__mmask8) -1);
7789 extern __inline __m512i
7790 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7791 _mm512_mask_unpackhi_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7793 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7794 (__v8di) __B,
7795 (__v8di) __W,
7796 (__mmask8) __U);
7799 extern __inline __m512i
7800 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7801 _mm512_maskz_unpackhi_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7803 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7804 (__v8di) __B,
7805 (__v8di)
7806 _mm512_setzero_si512 (),
7807 (__mmask8) __U);
7810 extern __inline __m512i
7811 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7812 _mm512_unpacklo_epi32 (__m512i __A, __m512i __B)
7814 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7815 (__v16si) __B,
7816 (__v16si)
7817 _mm512_undefined_epi32 (),
7818 (__mmask16) -1);
7821 extern __inline __m512i
7822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7823 _mm512_mask_unpacklo_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7824 __m512i __B)
7826 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7827 (__v16si) __B,
7828 (__v16si) __W,
7829 (__mmask16) __U);
7832 extern __inline __m512i
7833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7834 _mm512_maskz_unpacklo_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7836 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7837 (__v16si) __B,
7838 (__v16si)
7839 _mm512_setzero_si512 (),
7840 (__mmask16) __U);
7843 extern __inline __m512i
7844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7845 _mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
7847 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7848 (__v8di) __B,
7849 (__v8di)
7850 _mm512_undefined_epi32 (),
7851 (__mmask8) -1);
7854 extern __inline __m512i
7855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7856 _mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7858 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7859 (__v8di) __B,
7860 (__v8di) __W,
7861 (__mmask8) __U);
7864 extern __inline __m512i
7865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7866 _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7868 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7869 (__v8di) __B,
7870 (__v8di)
7871 _mm512_setzero_si512 (),
7872 (__mmask8) __U);
7875 #ifdef __x86_64__
7876 #ifdef __OPTIMIZE__
7877 extern __inline unsigned long long
7878 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7879 _mm_cvt_roundss_u64 (__m128 __A, const int __R)
7881 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) __A, __R);
7884 extern __inline long long
7885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7886 _mm_cvt_roundss_si64 (__m128 __A, const int __R)
7888 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
7891 extern __inline long long
7892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7893 _mm_cvt_roundss_i64 (__m128 __A, const int __R)
7895 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
7898 extern __inline unsigned long long
7899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7900 _mm_cvtt_roundss_u64 (__m128 __A, const int __R)
7902 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) __A, __R);
7905 extern __inline long long
7906 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7907 _mm_cvtt_roundss_i64 (__m128 __A, const int __R)
7909 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
7912 extern __inline long long
7913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7914 _mm_cvtt_roundss_si64 (__m128 __A, const int __R)
7916 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
7918 #else
7919 #define _mm_cvt_roundss_u64(A, B) \
7920 ((unsigned long long)__builtin_ia32_vcvtss2usi64(A, B))
7922 #define _mm_cvt_roundss_si64(A, B) \
7923 ((long long)__builtin_ia32_vcvtss2si64(A, B))
7925 #define _mm_cvt_roundss_i64(A, B) \
7926 ((long long)__builtin_ia32_vcvtss2si64(A, B))
7928 #define _mm_cvtt_roundss_u64(A, B) \
7929 ((unsigned long long)__builtin_ia32_vcvttss2usi64(A, B))
7931 #define _mm_cvtt_roundss_i64(A, B) \
7932 ((long long)__builtin_ia32_vcvttss2si64(A, B))
7934 #define _mm_cvtt_roundss_si64(A, B) \
7935 ((long long)__builtin_ia32_vcvttss2si64(A, B))
7936 #endif
7937 #endif
7939 #ifdef __OPTIMIZE__
7940 extern __inline unsigned
7941 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7942 _mm_cvt_roundss_u32 (__m128 __A, const int __R)
7944 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, __R);
7947 extern __inline int
7948 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7949 _mm_cvt_roundss_si32 (__m128 __A, const int __R)
7951 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
7954 extern __inline int
7955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7956 _mm_cvt_roundss_i32 (__m128 __A, const int __R)
7958 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
7961 extern __inline unsigned
7962 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7963 _mm_cvtt_roundss_u32 (__m128 __A, const int __R)
7965 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, __R);
7968 extern __inline int
7969 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7970 _mm_cvtt_roundss_i32 (__m128 __A, const int __R)
7972 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
7975 extern __inline int
7976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7977 _mm_cvtt_roundss_si32 (__m128 __A, const int __R)
7979 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
7981 #else
7982 #define _mm_cvt_roundss_u32(A, B) \
7983 ((unsigned)__builtin_ia32_vcvtss2usi32(A, B))
7985 #define _mm_cvt_roundss_si32(A, B) \
7986 ((int)__builtin_ia32_vcvtss2si32(A, B))
7988 #define _mm_cvt_roundss_i32(A, B) \
7989 ((int)__builtin_ia32_vcvtss2si32(A, B))
7991 #define _mm_cvtt_roundss_u32(A, B) \
7992 ((unsigned)__builtin_ia32_vcvttss2usi32(A, B))
7994 #define _mm_cvtt_roundss_si32(A, B) \
7995 ((int)__builtin_ia32_vcvttss2si32(A, B))
7997 #define _mm_cvtt_roundss_i32(A, B) \
7998 ((int)__builtin_ia32_vcvttss2si32(A, B))
7999 #endif
8001 #ifdef __x86_64__
8002 #ifdef __OPTIMIZE__
8003 extern __inline unsigned long long
8004 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8005 _mm_cvt_roundsd_u64 (__m128d __A, const int __R)
8007 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) __A, __R);
8010 extern __inline long long
8011 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8012 _mm_cvt_roundsd_si64 (__m128d __A, const int __R)
8014 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
8017 extern __inline long long
8018 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8019 _mm_cvt_roundsd_i64 (__m128d __A, const int __R)
8021 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
8024 extern __inline unsigned long long
8025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8026 _mm_cvtt_roundsd_u64 (__m128d __A, const int __R)
8028 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) __A, __R);
8031 extern __inline long long
8032 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8033 _mm_cvtt_roundsd_si64 (__m128d __A, const int __R)
8035 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
8038 extern __inline long long
8039 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8040 _mm_cvtt_roundsd_i64 (__m128d __A, const int __R)
8042 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
8044 #else
8045 #define _mm_cvt_roundsd_u64(A, B) \
8046 ((unsigned long long)__builtin_ia32_vcvtsd2usi64(A, B))
8048 #define _mm_cvt_roundsd_si64(A, B) \
8049 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
8051 #define _mm_cvt_roundsd_i64(A, B) \
8052 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
8054 #define _mm_cvtt_roundsd_u64(A, B) \
8055 ((unsigned long long)__builtin_ia32_vcvttsd2usi64(A, B))
8057 #define _mm_cvtt_roundsd_si64(A, B) \
8058 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
8060 #define _mm_cvtt_roundsd_i64(A, B) \
8061 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
8062 #endif
8063 #endif
8065 #ifdef __OPTIMIZE__
8066 extern __inline unsigned
8067 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8068 _mm_cvt_roundsd_u32 (__m128d __A, const int __R)
8070 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, __R);
8073 extern __inline int
8074 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8075 _mm_cvt_roundsd_si32 (__m128d __A, const int __R)
8077 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
8080 extern __inline int
8081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8082 _mm_cvt_roundsd_i32 (__m128d __A, const int __R)
8084 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
8087 extern __inline unsigned
8088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8089 _mm_cvtt_roundsd_u32 (__m128d __A, const int __R)
8091 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, __R);
8094 extern __inline int
8095 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8096 _mm_cvtt_roundsd_i32 (__m128d __A, const int __R)
8098 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
8101 extern __inline int
8102 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8103 _mm_cvtt_roundsd_si32 (__m128d __A, const int __R)
8105 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
8107 #else
8108 #define _mm_cvt_roundsd_u32(A, B) \
8109 ((unsigned)__builtin_ia32_vcvtsd2usi32(A, B))
8111 #define _mm_cvt_roundsd_si32(A, B) \
8112 ((int)__builtin_ia32_vcvtsd2si32(A, B))
8114 #define _mm_cvt_roundsd_i32(A, B) \
8115 ((int)__builtin_ia32_vcvtsd2si32(A, B))
8117 #define _mm_cvtt_roundsd_u32(A, B) \
8118 ((unsigned)__builtin_ia32_vcvttsd2usi32(A, B))
8120 #define _mm_cvtt_roundsd_si32(A, B) \
8121 ((int)__builtin_ia32_vcvttsd2si32(A, B))
8123 #define _mm_cvtt_roundsd_i32(A, B) \
8124 ((int)__builtin_ia32_vcvttsd2si32(A, B))
8125 #endif
8127 extern __inline __m512d
8128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8129 _mm512_movedup_pd (__m512d __A)
8131 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
8132 (__v8df)
8133 _mm512_undefined_pd (),
8134 (__mmask8) -1);
8137 extern __inline __m512d
8138 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8139 _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
8141 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
8142 (__v8df) __W,
8143 (__mmask8) __U);
8146 extern __inline __m512d
8147 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8148 _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
8150 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
8151 (__v8df)
8152 _mm512_setzero_pd (),
8153 (__mmask8) __U);
8156 extern __inline __m512d
8157 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8158 _mm512_unpacklo_pd (__m512d __A, __m512d __B)
8160 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
8161 (__v8df) __B,
8162 (__v8df)
8163 _mm512_undefined_pd (),
8164 (__mmask8) -1);
8167 extern __inline __m512d
8168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8169 _mm512_mask_unpacklo_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
8171 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
8172 (__v8df) __B,
8173 (__v8df) __W,
8174 (__mmask8) __U);
8177 extern __inline __m512d
8178 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8179 _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
8181 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
8182 (__v8df) __B,
8183 (__v8df)
8184 _mm512_setzero_pd (),
8185 (__mmask8) __U);
8188 extern __inline __m512d
8189 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8190 _mm512_unpackhi_pd (__m512d __A, __m512d __B)
8192 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
8193 (__v8df) __B,
8194 (__v8df)
8195 _mm512_undefined_pd (),
8196 (__mmask8) -1);
8199 extern __inline __m512d
8200 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8201 _mm512_mask_unpackhi_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
8203 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
8204 (__v8df) __B,
8205 (__v8df) __W,
8206 (__mmask8) __U);
8209 extern __inline __m512d
8210 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8211 _mm512_maskz_unpackhi_pd (__mmask8 __U, __m512d __A, __m512d __B)
8213 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
8214 (__v8df) __B,
8215 (__v8df)
8216 _mm512_setzero_pd (),
8217 (__mmask8) __U);
8220 extern __inline __m512
8221 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8222 _mm512_unpackhi_ps (__m512 __A, __m512 __B)
8224 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
8225 (__v16sf) __B,
8226 (__v16sf)
8227 _mm512_undefined_ps (),
8228 (__mmask16) -1);
8231 extern __inline __m512
8232 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8233 _mm512_mask_unpackhi_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
8235 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
8236 (__v16sf) __B,
8237 (__v16sf) __W,
8238 (__mmask16) __U);
8241 extern __inline __m512
8242 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8243 _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
8245 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
8246 (__v16sf) __B,
8247 (__v16sf)
8248 _mm512_setzero_ps (),
8249 (__mmask16) __U);
8252 #ifdef __OPTIMIZE__
8253 extern __inline __m512d
8254 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8255 _mm512_cvt_roundps_pd (__m256 __A, const int __R)
8257 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
8258 (__v8df)
8259 _mm512_undefined_pd (),
8260 (__mmask8) -1, __R);
8263 extern __inline __m512d
8264 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8265 _mm512_mask_cvt_roundps_pd (__m512d __W, __mmask8 __U, __m256 __A,
8266 const int __R)
8268 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
8269 (__v8df) __W,
8270 (__mmask8) __U, __R);
8273 extern __inline __m512d
8274 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8275 _mm512_maskz_cvt_roundps_pd (__mmask8 __U, __m256 __A, const int __R)
8277 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
8278 (__v8df)
8279 _mm512_setzero_pd (),
8280 (__mmask8) __U, __R);
8283 extern __inline __m512
8284 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8285 _mm512_cvt_roundph_ps (__m256i __A, const int __R)
8287 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
8288 (__v16sf)
8289 _mm512_undefined_ps (),
8290 (__mmask16) -1, __R);
8293 extern __inline __m512
8294 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8295 _mm512_mask_cvt_roundph_ps (__m512 __W, __mmask16 __U, __m256i __A,
8296 const int __R)
8298 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
8299 (__v16sf) __W,
8300 (__mmask16) __U, __R);
8303 extern __inline __m512
8304 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8305 _mm512_maskz_cvt_roundph_ps (__mmask16 __U, __m256i __A, const int __R)
8307 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
8308 (__v16sf)
8309 _mm512_setzero_ps (),
8310 (__mmask16) __U, __R);
8313 extern __inline __m256i
8314 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8315 _mm512_cvt_roundps_ph (__m512 __A, const int __I)
8317 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8318 __I,
8319 (__v16hi)
8320 _mm256_undefined_si256 (),
8321 -1);
8324 extern __inline __m256i
8325 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8326 _mm512_cvtps_ph (__m512 __A, const int __I)
8328 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8329 __I,
8330 (__v16hi)
8331 _mm256_undefined_si256 (),
8332 -1);
8335 extern __inline __m256i
8336 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8337 _mm512_mask_cvt_roundps_ph (__m256i __U, __mmask16 __W, __m512 __A,
8338 const int __I)
8340 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8341 __I,
8342 (__v16hi) __U,
8343 (__mmask16) __W);
8346 extern __inline __m256i
8347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8348 _mm512_mask_cvtps_ph (__m256i __U, __mmask16 __W, __m512 __A, const int __I)
8350 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8351 __I,
8352 (__v16hi) __U,
8353 (__mmask16) __W);
8356 extern __inline __m256i
8357 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8358 _mm512_maskz_cvt_roundps_ph (__mmask16 __W, __m512 __A, const int __I)
8360 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8361 __I,
8362 (__v16hi)
8363 _mm256_setzero_si256 (),
8364 (__mmask16) __W);
8367 extern __inline __m256i
8368 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8369 _mm512_maskz_cvtps_ph (__mmask16 __W, __m512 __A, const int __I)
8371 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8372 __I,
8373 (__v16hi)
8374 _mm256_setzero_si256 (),
8375 (__mmask16) __W);
8377 #else
8378 #define _mm512_cvt_roundps_pd(A, B) \
8379 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, B)
8381 #define _mm512_mask_cvt_roundps_pd(W, U, A, B) \
8382 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)(W), U, B)
8384 #define _mm512_maskz_cvt_roundps_pd(U, A, B) \
8385 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_setzero_pd(), U, B)
8387 #define _mm512_cvt_roundph_ps(A, B) \
8388 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
8390 #define _mm512_mask_cvt_roundph_ps(W, U, A, B) \
8391 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)(W), U, B)
8393 #define _mm512_maskz_cvt_roundph_ps(U, A, B) \
8394 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_setzero_ps(), U, B)
8396 #define _mm512_cvt_roundps_ph(A, I) \
8397 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
8398 (__v16hi)_mm256_undefined_si256 (), -1))
8399 #define _mm512_cvtps_ph(A, I) \
8400 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
8401 (__v16hi)_mm256_undefined_si256 (), -1))
8402 #define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
8403 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
8404 (__v16hi)(__m256i)(U), (__mmask16) (W)))
8405 #define _mm512_mask_cvtps_ph(U, W, A, I) \
8406 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
8407 (__v16hi)(__m256i)(U), (__mmask16) (W)))
8408 #define _mm512_maskz_cvt_roundps_ph(W, A, I) \
8409 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
8410 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
8411 #define _mm512_maskz_cvtps_ph(W, A, I) \
8412 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
8413 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
8414 #endif
8416 #ifdef __OPTIMIZE__
8417 extern __inline __m256
8418 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8419 _mm512_cvt_roundpd_ps (__m512d __A, const int __R)
8421 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
8422 (__v8sf)
8423 _mm256_undefined_ps (),
8424 (__mmask8) -1, __R);
8427 extern __inline __m256
8428 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8429 _mm512_mask_cvt_roundpd_ps (__m256 __W, __mmask8 __U, __m512d __A,
8430 const int __R)
8432 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
8433 (__v8sf) __W,
8434 (__mmask8) __U, __R);
8437 extern __inline __m256
8438 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8439 _mm512_maskz_cvt_roundpd_ps (__mmask8 __U, __m512d __A, const int __R)
8441 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
8442 (__v8sf)
8443 _mm256_setzero_ps (),
8444 (__mmask8) __U, __R);
8447 extern __inline __m128
8448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8449 _mm_cvt_roundsd_ss (__m128 __A, __m128d __B, const int __R)
8451 return (__m128) __builtin_ia32_cvtsd2ss_round ((__v4sf) __A,
8452 (__v2df) __B,
8453 __R);
8456 extern __inline __m128d
8457 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8458 _mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R)
8460 return (__m128d) __builtin_ia32_cvtss2sd_round ((__v2df) __A,
8461 (__v4sf) __B,
8462 __R);
8464 #else
8465 #define _mm512_cvt_roundpd_ps(A, B) \
8466 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_undefined_ps(), -1, B)
8468 #define _mm512_mask_cvt_roundpd_ps(W, U, A, B) \
8469 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)(W), U, B)
8471 #define _mm512_maskz_cvt_roundpd_ps(U, A, B) \
8472 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), U, B)
8474 #define _mm_cvt_roundsd_ss(A, B, C) \
8475 (__m128)__builtin_ia32_cvtsd2ss_round(A, B, C)
8477 #define _mm_cvt_roundss_sd(A, B, C) \
8478 (__m128d)__builtin_ia32_cvtss2sd_round(A, B, C)
8479 #endif
8481 extern __inline void
8482 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8483 _mm512_stream_si512 (__m512i * __P, __m512i __A)
8485 __builtin_ia32_movntdq512 ((__v8di *) __P, (__v8di) __A);
8488 extern __inline void
8489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8490 _mm512_stream_ps (float *__P, __m512 __A)
8492 __builtin_ia32_movntps512 (__P, (__v16sf) __A);
8495 extern __inline void
8496 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8497 _mm512_stream_pd (double *__P, __m512d __A)
8499 __builtin_ia32_movntpd512 (__P, (__v8df) __A);
8502 extern __inline __m512i
8503 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8504 _mm512_stream_load_si512 (void *__P)
8506 return __builtin_ia32_movntdqa512 ((__v8di *)__P);
8509 /* Constants for mantissa extraction */
8510 typedef enum
8512 _MM_MANT_NORM_1_2, /* interval [1, 2) */
8513 _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
8514 _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
8515 _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
8516 } _MM_MANTISSA_NORM_ENUM;
8518 typedef enum
8520 _MM_MANT_SIGN_src, /* sign = sign(SRC) */
8521 _MM_MANT_SIGN_zero, /* sign = 0 */
8522 _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
8523 } _MM_MANTISSA_SIGN_ENUM;
8525 #ifdef __OPTIMIZE__
8526 extern __inline __m128
8527 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8528 _mm_getexp_round_ss (__m128 __A, __m128 __B, const int __R)
8530 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
8531 (__v4sf) __B,
8532 __R);
8535 extern __inline __m128
8536 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8537 _mm_mask_getexp_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
8538 __m128 __B, const int __R)
8540 return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
8541 (__v4sf) __B,
8542 (__v4sf) __W,
8543 (__mmask8) __U, __R);
8546 extern __inline __m128
8547 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8548 _mm_maskz_getexp_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
8549 const int __R)
8551 return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
8552 (__v4sf) __B,
8553 (__v4sf)
8554 _mm_setzero_ps (),
8555 (__mmask8) __U, __R);
8558 extern __inline __m128d
8559 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8560 _mm_getexp_round_sd (__m128d __A, __m128d __B, const int __R)
8562 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
8563 (__v2df) __B,
8564 __R);
8567 extern __inline __m128d
8568 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8569 _mm_mask_getexp_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
8570 __m128d __B, const int __R)
8572 return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
8573 (__v2df) __B,
8574 (__v2df) __W,
8575 (__mmask8) __U, __R);
8578 extern __inline __m128d
8579 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8580 _mm_maskz_getexp_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
8581 const int __R)
8583 return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
8584 (__v2df) __B,
8585 (__v2df)
8586 _mm_setzero_pd (),
8587 (__mmask8) __U, __R);
8590 extern __inline __m512
8591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8592 _mm512_getexp_round_ps (__m512 __A, const int __R)
8594 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8595 (__v16sf)
8596 _mm512_undefined_ps (),
8597 (__mmask16) -1, __R);
8600 extern __inline __m512
8601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8602 _mm512_mask_getexp_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8603 const int __R)
8605 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8606 (__v16sf) __W,
8607 (__mmask16) __U, __R);
8610 extern __inline __m512
8611 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8612 _mm512_maskz_getexp_round_ps (__mmask16 __U, __m512 __A, const int __R)
8614 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8615 (__v16sf)
8616 _mm512_setzero_ps (),
8617 (__mmask16) __U, __R);
8620 extern __inline __m512d
8621 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8622 _mm512_getexp_round_pd (__m512d __A, const int __R)
8624 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8625 (__v8df)
8626 _mm512_undefined_pd (),
8627 (__mmask8) -1, __R);
8630 extern __inline __m512d
8631 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8632 _mm512_mask_getexp_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8633 const int __R)
8635 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8636 (__v8df) __W,
8637 (__mmask8) __U, __R);
8640 extern __inline __m512d
8641 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8642 _mm512_maskz_getexp_round_pd (__mmask8 __U, __m512d __A, const int __R)
8644 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8645 (__v8df)
8646 _mm512_setzero_pd (),
8647 (__mmask8) __U, __R);
8650 extern __inline __m512d
8651 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8652 _mm512_getmant_round_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
8653 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8655 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8656 (__C << 2) | __B,
8657 _mm512_undefined_pd (),
8658 (__mmask8) -1, __R);
8661 extern __inline __m512d
8662 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8663 _mm512_mask_getmant_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8664 _MM_MANTISSA_NORM_ENUM __B,
8665 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8667 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8668 (__C << 2) | __B,
8669 (__v8df) __W, __U,
8670 __R);
8673 extern __inline __m512d
8674 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8675 _mm512_maskz_getmant_round_pd (__mmask8 __U, __m512d __A,
8676 _MM_MANTISSA_NORM_ENUM __B,
8677 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8679 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8680 (__C << 2) | __B,
8681 (__v8df)
8682 _mm512_setzero_pd (),
8683 __U, __R);
8686 extern __inline __m512
8687 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8688 _mm512_getmant_round_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
8689 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8691 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8692 (__C << 2) | __B,
8693 _mm512_undefined_ps (),
8694 (__mmask16) -1, __R);
8697 extern __inline __m512
8698 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8699 _mm512_mask_getmant_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8700 _MM_MANTISSA_NORM_ENUM __B,
8701 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8703 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8704 (__C << 2) | __B,
8705 (__v16sf) __W, __U,
8706 __R);
8709 extern __inline __m512
8710 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8711 _mm512_maskz_getmant_round_ps (__mmask16 __U, __m512 __A,
8712 _MM_MANTISSA_NORM_ENUM __B,
8713 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8715 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8716 (__C << 2) | __B,
8717 (__v16sf)
8718 _mm512_setzero_ps (),
8719 __U, __R);
8722 extern __inline __m128d
8723 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8724 _mm_getmant_round_sd (__m128d __A, __m128d __B,
8725 _MM_MANTISSA_NORM_ENUM __C,
8726 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8728 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
8729 (__v2df) __B,
8730 (__D << 2) | __C,
8731 __R);
8734 extern __inline __m128d
8735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8736 _mm_mask_getmant_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
8737 __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
8738 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8740 return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
8741 (__v2df) __B,
8742 (__D << 2) | __C,
8743 (__v2df) __W,
8744 __U, __R);
8747 extern __inline __m128d
8748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8749 _mm_maskz_getmant_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
8750 _MM_MANTISSA_NORM_ENUM __C,
8751 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8753 return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
8754 (__v2df) __B,
8755 (__D << 2) | __C,
8756 (__v2df)
8757 _mm_setzero_pd(),
8758 __U, __R);
8761 extern __inline __m128
8762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8763 _mm_getmant_round_ss (__m128 __A, __m128 __B,
8764 _MM_MANTISSA_NORM_ENUM __C,
8765 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8767 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
8768 (__v4sf) __B,
8769 (__D << 2) | __C,
8770 __R);
8773 extern __inline __m128
8774 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8775 _mm_mask_getmant_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
8776 __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
8777 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8779 return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
8780 (__v4sf) __B,
8781 (__D << 2) | __C,
8782 (__v4sf) __W,
8783 __U, __R);
8786 extern __inline __m128
8787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8788 _mm_maskz_getmant_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
8789 _MM_MANTISSA_NORM_ENUM __C,
8790 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8792 return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
8793 (__v4sf) __B,
8794 (__D << 2) | __C,
8795 (__v4sf)
8796 _mm_setzero_ps(),
8797 __U, __R);
8800 #else
8801 #define _mm512_getmant_round_pd(X, B, C, R) \
8802 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8803 (int)(((C)<<2) | (B)), \
8804 (__v8df)(__m512d)_mm512_undefined_pd(), \
8805 (__mmask8)-1,\
8806 (R)))
8808 #define _mm512_mask_getmant_round_pd(W, U, X, B, C, R) \
8809 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8810 (int)(((C)<<2) | (B)), \
8811 (__v8df)(__m512d)(W), \
8812 (__mmask8)(U),\
8813 (R)))
8815 #define _mm512_maskz_getmant_round_pd(U, X, B, C, R) \
8816 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8817 (int)(((C)<<2) | (B)), \
8818 (__v8df)(__m512d)_mm512_setzero_pd(), \
8819 (__mmask8)(U),\
8820 (R)))
8821 #define _mm512_getmant_round_ps(X, B, C, R) \
8822 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8823 (int)(((C)<<2) | (B)), \
8824 (__v16sf)(__m512)_mm512_undefined_ps(), \
8825 (__mmask16)-1,\
8826 (R)))
8828 #define _mm512_mask_getmant_round_ps(W, U, X, B, C, R) \
8829 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8830 (int)(((C)<<2) | (B)), \
8831 (__v16sf)(__m512)(W), \
8832 (__mmask16)(U),\
8833 (R)))
8835 #define _mm512_maskz_getmant_round_ps(U, X, B, C, R) \
8836 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8837 (int)(((C)<<2) | (B)), \
8838 (__v16sf)(__m512)_mm512_setzero_ps(), \
8839 (__mmask16)(U),\
8840 (R)))
8841 #define _mm_getmant_round_sd(X, Y, C, D, R) \
8842 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
8843 (__v2df)(__m128d)(Y), \
8844 (int)(((D)<<2) | (C)), \
8845 (R)))
8847 #define _mm_mask_getmant_round_sd(W, U, X, Y, C, D, R) \
8848 ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
8849 (__v2df)(__m128d)(Y), \
8850 (int)(((D)<<2) | (C)), \
8851 (__v2df)(__m128d)(W), \
8852 (__mmask8)(U),\
8853 (R)))
8855 #define _mm_maskz_getmant_round_sd(U, X, Y, C, D, R) \
8856 ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
8857 (__v2df)(__m128d)(Y), \
8858 (int)(((D)<<2) | (C)), \
8859 (__v2df)(__m128d)_mm_setzero_pd(), \
8860 (__mmask8)(U),\
8861 (R)))
8863 #define _mm_getmant_round_ss(X, Y, C, D, R) \
8864 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
8865 (__v4sf)(__m128)(Y), \
8866 (int)(((D)<<2) | (C)), \
8867 (R)))
8869 #define _mm_mask_getmant_round_ss(W, U, X, Y, C, D, R) \
8870 ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
8871 (__v4sf)(__m128)(Y), \
8872 (int)(((D)<<2) | (C)), \
8873 (__v4sf)(__m128)(W), \
8874 (__mmask8)(U),\
8875 (R)))
8877 #define _mm_maskz_getmant_round_ss(U, X, Y, C, D, R) \
8878 ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
8879 (__v4sf)(__m128)(Y), \
8880 (int)(((D)<<2) | (C)), \
8881 (__v4sf)(__m128)_mm_setzero_ps(), \
8882 (__mmask8)(U),\
8883 (R)))
8885 #define _mm_getexp_round_ss(A, B, R) \
8886 ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), R))
8888 #define _mm_mask_getexp_round_ss(W, U, A, B, C) \
8889 (__m128)__builtin_ia32_getexpss_mask_round(A, B, W, U, C)
8891 #define _mm_maskz_getexp_round_ss(U, A, B, C) \
8892 (__m128)__builtin_ia32_getexpss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
8894 #define _mm_getexp_round_sd(A, B, R) \
8895 ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), R))
8897 #define _mm_mask_getexp_round_sd(W, U, A, B, C) \
8898 (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, W, U, C)
8900 #define _mm_maskz_getexp_round_sd(U, A, B, C) \
8901 (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
8904 #define _mm512_getexp_round_ps(A, R) \
8905 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8906 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, R))
8908 #define _mm512_mask_getexp_round_ps(W, U, A, R) \
8909 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8910 (__v16sf)(__m512)(W), (__mmask16)(U), R))
8912 #define _mm512_maskz_getexp_round_ps(U, A, R) \
8913 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8914 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), R))
8916 #define _mm512_getexp_round_pd(A, R) \
8917 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8918 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, R))
8920 #define _mm512_mask_getexp_round_pd(W, U, A, R) \
8921 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8922 (__v8df)(__m512d)(W), (__mmask8)(U), R))
8924 #define _mm512_maskz_getexp_round_pd(U, A, R) \
8925 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8926 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), R))
8927 #endif
8929 #ifdef __OPTIMIZE__
8930 extern __inline __m512
8931 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8932 _mm512_roundscale_round_ps (__m512 __A, const int __imm, const int __R)
8934 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
8935 (__v16sf)
8936 _mm512_undefined_ps (),
8937 -1, __R);
8940 extern __inline __m512
8941 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8942 _mm512_mask_roundscale_round_ps (__m512 __A, __mmask16 __B, __m512 __C,
8943 const int __imm, const int __R)
8945 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
8946 (__v16sf) __A,
8947 (__mmask16) __B, __R);
8950 extern __inline __m512
8951 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8952 _mm512_maskz_roundscale_round_ps (__mmask16 __A, __m512 __B,
8953 const int __imm, const int __R)
8955 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
8956 __imm,
8957 (__v16sf)
8958 _mm512_setzero_ps (),
8959 (__mmask16) __A, __R);
8962 extern __inline __m512d
8963 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8964 _mm512_roundscale_round_pd (__m512d __A, const int __imm, const int __R)
8966 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
8967 (__v8df)
8968 _mm512_undefined_pd (),
8969 -1, __R);
8972 extern __inline __m512d
8973 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8974 _mm512_mask_roundscale_round_pd (__m512d __A, __mmask8 __B,
8975 __m512d __C, const int __imm, const int __R)
8977 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
8978 (__v8df) __A,
8979 (__mmask8) __B, __R);
8982 extern __inline __m512d
8983 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8984 _mm512_maskz_roundscale_round_pd (__mmask8 __A, __m512d __B,
8985 const int __imm, const int __R)
8987 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
8988 __imm,
8989 (__v8df)
8990 _mm512_setzero_pd (),
8991 (__mmask8) __A, __R);
8994 extern __inline __m128
8995 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8996 _mm_roundscale_round_ss (__m128 __A, __m128 __B, const int __imm, const int __R)
8998 return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
8999 (__v4sf) __B, __imm, __R);
9002 extern __inline __m128d
9003 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9004 _mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm,
9005 const int __R)
9007 return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
9008 (__v2df) __B, __imm, __R);
9011 #else
9012 #define _mm512_roundscale_round_ps(A, B, R) \
9013 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
9014 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), R))
9015 #define _mm512_mask_roundscale_round_ps(A, B, C, D, R) \
9016 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
9017 (int)(D), \
9018 (__v16sf)(__m512)(A), \
9019 (__mmask16)(B), R))
9020 #define _mm512_maskz_roundscale_round_ps(A, B, C, R) \
9021 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
9022 (int)(C), \
9023 (__v16sf)_mm512_setzero_ps(),\
9024 (__mmask16)(A), R))
9025 #define _mm512_roundscale_round_pd(A, B, R) \
9026 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
9027 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), R))
9028 #define _mm512_mask_roundscale_round_pd(A, B, C, D, R) \
9029 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
9030 (int)(D), \
9031 (__v8df)(__m512d)(A), \
9032 (__mmask8)(B), R))
9033 #define _mm512_maskz_roundscale_round_pd(A, B, C, R) \
9034 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
9035 (int)(C), \
9036 (__v8df)_mm512_setzero_pd(),\
9037 (__mmask8)(A), R))
9038 #define _mm_roundscale_round_ss(A, B, C, R) \
9039 ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \
9040 (__v4sf)(__m128)(B), (int)(C), R))
9041 #define _mm_roundscale_round_sd(A, B, C, R) \
9042 ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \
9043 (__v2df)(__m128d)(B), (int)(C), R))
9044 #endif
9046 extern __inline __m512
9047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9048 _mm512_floor_ps (__m512 __A)
9050 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
9051 _MM_FROUND_FLOOR,
9052 (__v16sf) __A, -1,
9053 _MM_FROUND_CUR_DIRECTION);
9056 extern __inline __m512d
9057 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9058 _mm512_floor_pd (__m512d __A)
9060 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
9061 _MM_FROUND_FLOOR,
9062 (__v8df) __A, -1,
9063 _MM_FROUND_CUR_DIRECTION);
9066 extern __inline __m512
9067 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9068 _mm512_ceil_ps (__m512 __A)
9070 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
9071 _MM_FROUND_CEIL,
9072 (__v16sf) __A, -1,
9073 _MM_FROUND_CUR_DIRECTION);
9076 extern __inline __m512d
9077 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9078 _mm512_ceil_pd (__m512d __A)
9080 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
9081 _MM_FROUND_CEIL,
9082 (__v8df) __A, -1,
9083 _MM_FROUND_CUR_DIRECTION);
9086 extern __inline __m512
9087 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9088 _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
9090 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
9091 _MM_FROUND_FLOOR,
9092 (__v16sf) __W, __U,
9093 _MM_FROUND_CUR_DIRECTION);
9096 extern __inline __m512d
9097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9098 _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
9100 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
9101 _MM_FROUND_FLOOR,
9102 (__v8df) __W, __U,
9103 _MM_FROUND_CUR_DIRECTION);
9106 extern __inline __m512
9107 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9108 _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
9110 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
9111 _MM_FROUND_CEIL,
9112 (__v16sf) __W, __U,
9113 _MM_FROUND_CUR_DIRECTION);
9116 extern __inline __m512d
9117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9118 _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
9120 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
9121 _MM_FROUND_CEIL,
9122 (__v8df) __W, __U,
9123 _MM_FROUND_CUR_DIRECTION);
9126 #ifdef __OPTIMIZE__
9127 extern __inline __m512i
9128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9129 _mm512_alignr_epi32 (__m512i __A, __m512i __B, const int __imm)
9131 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
9132 (__v16si) __B, __imm,
9133 (__v16si)
9134 _mm512_undefined_epi32 (),
9135 (__mmask16) -1);
9138 extern __inline __m512i
9139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9140 _mm512_mask_alignr_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
9141 __m512i __B, const int __imm)
9143 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
9144 (__v16si) __B, __imm,
9145 (__v16si) __W,
9146 (__mmask16) __U);
9149 extern __inline __m512i
9150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9151 _mm512_maskz_alignr_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
9152 const int __imm)
9154 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
9155 (__v16si) __B, __imm,
9156 (__v16si)
9157 _mm512_setzero_si512 (),
9158 (__mmask16) __U);
9161 extern __inline __m512i
9162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9163 _mm512_alignr_epi64 (__m512i __A, __m512i __B, const int __imm)
9165 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
9166 (__v8di) __B, __imm,
9167 (__v8di)
9168 _mm512_undefined_epi32 (),
9169 (__mmask8) -1);
9172 extern __inline __m512i
9173 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9174 _mm512_mask_alignr_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
9175 __m512i __B, const int __imm)
9177 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
9178 (__v8di) __B, __imm,
9179 (__v8di) __W,
9180 (__mmask8) __U);
9183 extern __inline __m512i
9184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9185 _mm512_maskz_alignr_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
9186 const int __imm)
9188 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
9189 (__v8di) __B, __imm,
9190 (__v8di)
9191 _mm512_setzero_si512 (),
9192 (__mmask8) __U);
9194 #else
9195 #define _mm512_alignr_epi32(X, Y, C) \
9196 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
9197 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_undefined_epi32 (),\
9198 (__mmask16)-1))
9200 #define _mm512_mask_alignr_epi32(W, U, X, Y, C) \
9201 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
9202 (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)(W), \
9203 (__mmask16)(U)))
9205 #define _mm512_maskz_alignr_epi32(U, X, Y, C) \
9206 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
9207 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_setzero_si512 (),\
9208 (__mmask16)(U)))
9210 #define _mm512_alignr_epi64(X, Y, C) \
9211 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
9212 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_undefined_epi32 (), \
9213 (__mmask8)-1))
9215 #define _mm512_mask_alignr_epi64(W, U, X, Y, C) \
9216 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
9217 (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)(W), (__mmask8)(U)))
9219 #define _mm512_maskz_alignr_epi64(U, X, Y, C) \
9220 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
9221 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_setzero_si512 (),\
9222 (__mmask8)(U)))
9223 #endif
9225 extern __inline __mmask16
9226 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9227 _mm512_cmpeq_epi32_mask (__m512i __A, __m512i __B)
9229 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
9230 (__v16si) __B,
9231 (__mmask16) -1);
9234 extern __inline __mmask16
9235 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9236 _mm512_mask_cmpeq_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
9238 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
9239 (__v16si) __B, __U);
9242 extern __inline __mmask8
9243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9244 _mm512_mask_cmpeq_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
9246 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
9247 (__v8di) __B, __U);
9250 extern __inline __mmask8
9251 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9252 _mm512_cmpeq_epi64_mask (__m512i __A, __m512i __B)
9254 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
9255 (__v8di) __B,
9256 (__mmask8) -1);
9259 extern __inline __mmask16
9260 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9261 _mm512_cmpgt_epi32_mask (__m512i __A, __m512i __B)
9263 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
9264 (__v16si) __B,
9265 (__mmask16) -1);
9268 extern __inline __mmask16
9269 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9270 _mm512_mask_cmpgt_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
9272 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
9273 (__v16si) __B, __U);
9276 extern __inline __mmask8
9277 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9278 _mm512_mask_cmpgt_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
9280 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
9281 (__v8di) __B, __U);
9284 extern __inline __mmask8
9285 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9286 _mm512_cmpgt_epi64_mask (__m512i __A, __m512i __B)
9288 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
9289 (__v8di) __B,
9290 (__mmask8) -1);
9293 extern __inline __mmask16
9294 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9295 _mm512_cmpge_epi32_mask (__m512i __X, __m512i __Y)
9297 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9298 (__v16si) __Y, 5,
9299 (__mmask16) -1);
9302 extern __inline __mmask16
9303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9304 _mm512_mask_cmpge_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9306 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9307 (__v16si) __Y, 5,
9308 (__mmask16) __M);
9311 extern __inline __mmask16
9312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9313 _mm512_mask_cmpge_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9315 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9316 (__v16si) __Y, 5,
9317 (__mmask16) __M);
9320 extern __inline __mmask16
9321 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9322 _mm512_cmpge_epu32_mask (__m512i __X, __m512i __Y)
9324 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9325 (__v16si) __Y, 5,
9326 (__mmask16) -1);
9329 extern __inline __mmask8
9330 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9331 _mm512_mask_cmpge_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9333 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9334 (__v8di) __Y, 5,
9335 (__mmask8) __M);
9338 extern __inline __mmask8
9339 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9340 _mm512_cmpge_epi64_mask (__m512i __X, __m512i __Y)
9342 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9343 (__v8di) __Y, 5,
9344 (__mmask8) -1);
9347 extern __inline __mmask8
9348 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9349 _mm512_mask_cmpge_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9351 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9352 (__v8di) __Y, 5,
9353 (__mmask8) __M);
9356 extern __inline __mmask8
9357 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9358 _mm512_cmpge_epu64_mask (__m512i __X, __m512i __Y)
9360 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9361 (__v8di) __Y, 5,
9362 (__mmask8) -1);
9365 extern __inline __mmask16
9366 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9367 _mm512_mask_cmple_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9369 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9370 (__v16si) __Y, 2,
9371 (__mmask16) __M);
9374 extern __inline __mmask16
9375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9376 _mm512_cmple_epi32_mask (__m512i __X, __m512i __Y)
9378 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9379 (__v16si) __Y, 2,
9380 (__mmask16) -1);
9383 extern __inline __mmask16
9384 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9385 _mm512_mask_cmple_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9387 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9388 (__v16si) __Y, 2,
9389 (__mmask16) __M);
9392 extern __inline __mmask16
9393 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9394 _mm512_cmple_epu32_mask (__m512i __X, __m512i __Y)
9396 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9397 (__v16si) __Y, 2,
9398 (__mmask16) -1);
9401 extern __inline __mmask8
9402 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9403 _mm512_mask_cmple_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9405 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9406 (__v8di) __Y, 2,
9407 (__mmask8) __M);
9410 extern __inline __mmask8
9411 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9412 _mm512_cmple_epi64_mask (__m512i __X, __m512i __Y)
9414 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9415 (__v8di) __Y, 2,
9416 (__mmask8) -1);
9419 extern __inline __mmask8
9420 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9421 _mm512_mask_cmple_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9423 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9424 (__v8di) __Y, 2,
9425 (__mmask8) __M);
9428 extern __inline __mmask8
9429 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9430 _mm512_cmple_epu64_mask (__m512i __X, __m512i __Y)
9432 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9433 (__v8di) __Y, 2,
9434 (__mmask8) -1);
9437 extern __inline __mmask16
9438 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9439 _mm512_mask_cmplt_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9441 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9442 (__v16si) __Y, 1,
9443 (__mmask16) __M);
9446 extern __inline __mmask16
9447 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9448 _mm512_cmplt_epi32_mask (__m512i __X, __m512i __Y)
9450 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9451 (__v16si) __Y, 1,
9452 (__mmask16) -1);
9455 extern __inline __mmask16
9456 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9457 _mm512_mask_cmplt_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9459 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9460 (__v16si) __Y, 1,
9461 (__mmask16) __M);
9464 extern __inline __mmask16
9465 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9466 _mm512_cmplt_epu32_mask (__m512i __X, __m512i __Y)
9468 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9469 (__v16si) __Y, 1,
9470 (__mmask16) -1);
9473 extern __inline __mmask8
9474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9475 _mm512_mask_cmplt_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9477 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9478 (__v8di) __Y, 1,
9479 (__mmask8) __M);
9482 extern __inline __mmask8
9483 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9484 _mm512_cmplt_epi64_mask (__m512i __X, __m512i __Y)
9486 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9487 (__v8di) __Y, 1,
9488 (__mmask8) -1);
9491 extern __inline __mmask8
9492 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9493 _mm512_mask_cmplt_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9495 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9496 (__v8di) __Y, 1,
9497 (__mmask8) __M);
9500 extern __inline __mmask8
9501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9502 _mm512_cmplt_epu64_mask (__m512i __X, __m512i __Y)
9504 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9505 (__v8di) __Y, 1,
9506 (__mmask8) -1);
9509 extern __inline __mmask16
9510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9511 _mm512_cmpneq_epi32_mask (__m512i __X, __m512i __Y)
9513 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9514 (__v16si) __Y, 4,
9515 (__mmask16) -1);
9518 extern __inline __mmask16
9519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9520 _mm512_mask_cmpneq_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9522 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9523 (__v16si) __Y, 4,
9524 (__mmask16) __M);
9527 extern __inline __mmask16
9528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9529 _mm512_mask_cmpneq_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9531 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9532 (__v16si) __Y, 4,
9533 (__mmask16) __M);
9536 extern __inline __mmask16
9537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9538 _mm512_cmpneq_epu32_mask (__m512i __X, __m512i __Y)
9540 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9541 (__v16si) __Y, 4,
9542 (__mmask16) -1);
9545 extern __inline __mmask8
9546 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9547 _mm512_mask_cmpneq_epi64_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9549 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9550 (__v8di) __Y, 4,
9551 (__mmask8) __M);
9554 extern __inline __mmask8
9555 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9556 _mm512_cmpneq_epi64_mask (__m512i __X, __m512i __Y)
9558 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9559 (__v8di) __Y, 4,
9560 (__mmask8) -1);
9563 extern __inline __mmask8
9564 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9565 _mm512_mask_cmpneq_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9567 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9568 (__v8di) __Y, 4,
9569 (__mmask8) __M);
9572 extern __inline __mmask8
9573 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9574 _mm512_cmpneq_epu64_mask (__m512i __X, __m512i __Y)
9576 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9577 (__v8di) __Y, 4,
9578 (__mmask8) -1);
9581 #define _MM_CMPINT_EQ 0x0
9582 #define _MM_CMPINT_LT 0x1
9583 #define _MM_CMPINT_LE 0x2
9584 #define _MM_CMPINT_UNUSED 0x3
9585 #define _MM_CMPINT_NE 0x4
9586 #define _MM_CMPINT_NLT 0x5
9587 #define _MM_CMPINT_GE 0x5
9588 #define _MM_CMPINT_NLE 0x6
9589 #define _MM_CMPINT_GT 0x6
9591 #ifdef __OPTIMIZE__
9592 extern __inline __mmask16
9593 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9594 _kshiftli_mask16 (__mmask16 __A, unsigned int __B)
9596 return (__mmask16) __builtin_ia32_kshiftlihi ((__mmask16) __A,
9597 (__mmask8) __B);
9600 extern __inline __mmask16
9601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9602 _kshiftri_mask16 (__mmask16 __A, unsigned int __B)
9604 return (__mmask16) __builtin_ia32_kshiftrihi ((__mmask16) __A,
9605 (__mmask8) __B);
9608 extern __inline __mmask8
9609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9610 _mm512_cmp_epi64_mask (__m512i __X, __m512i __Y, const int __P)
9612 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9613 (__v8di) __Y, __P,
9614 (__mmask8) -1);
9617 extern __inline __mmask16
9618 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9619 _mm512_cmp_epi32_mask (__m512i __X, __m512i __Y, const int __P)
9621 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9622 (__v16si) __Y, __P,
9623 (__mmask16) -1);
9626 extern __inline __mmask8
9627 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9628 _mm512_cmp_epu64_mask (__m512i __X, __m512i __Y, const int __P)
9630 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9631 (__v8di) __Y, __P,
9632 (__mmask8) -1);
9635 extern __inline __mmask16
9636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9637 _mm512_cmp_epu32_mask (__m512i __X, __m512i __Y, const int __P)
9639 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9640 (__v16si) __Y, __P,
9641 (__mmask16) -1);
9644 extern __inline __mmask8
9645 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9646 _mm512_cmp_round_pd_mask (__m512d __X, __m512d __Y, const int __P,
9647 const int __R)
9649 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
9650 (__v8df) __Y, __P,
9651 (__mmask8) -1, __R);
9654 extern __inline __mmask16
9655 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9656 _mm512_cmp_round_ps_mask (__m512 __X, __m512 __Y, const int __P, const int __R)
9658 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
9659 (__v16sf) __Y, __P,
9660 (__mmask16) -1, __R);
9663 extern __inline __mmask8
9664 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9665 _mm512_mask_cmp_epi64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
9666 const int __P)
9668 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9669 (__v8di) __Y, __P,
9670 (__mmask8) __U);
9673 extern __inline __mmask16
9674 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9675 _mm512_mask_cmp_epi32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
9676 const int __P)
9678 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9679 (__v16si) __Y, __P,
9680 (__mmask16) __U);
9683 extern __inline __mmask8
9684 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9685 _mm512_mask_cmp_epu64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
9686 const int __P)
9688 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9689 (__v8di) __Y, __P,
9690 (__mmask8) __U);
9693 extern __inline __mmask16
9694 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9695 _mm512_mask_cmp_epu32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
9696 const int __P)
9698 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9699 (__v16si) __Y, __P,
9700 (__mmask16) __U);
9703 extern __inline __mmask8
9704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9705 _mm512_mask_cmp_round_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y,
9706 const int __P, const int __R)
9708 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
9709 (__v8df) __Y, __P,
9710 (__mmask8) __U, __R);
9713 extern __inline __mmask16
9714 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9715 _mm512_mask_cmp_round_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y,
9716 const int __P, const int __R)
9718 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
9719 (__v16sf) __Y, __P,
9720 (__mmask16) __U, __R);
9723 extern __inline __mmask8
9724 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9725 _mm_cmp_round_sd_mask (__m128d __X, __m128d __Y, const int __P, const int __R)
9727 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
9728 (__v2df) __Y, __P,
9729 (__mmask8) -1, __R);
9732 extern __inline __mmask8
9733 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9734 _mm_mask_cmp_round_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y,
9735 const int __P, const int __R)
9737 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
9738 (__v2df) __Y, __P,
9739 (__mmask8) __M, __R);
9742 extern __inline __mmask8
9743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9744 _mm_cmp_round_ss_mask (__m128 __X, __m128 __Y, const int __P, const int __R)
9746 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
9747 (__v4sf) __Y, __P,
9748 (__mmask8) -1, __R);
9751 extern __inline __mmask8
9752 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9753 _mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y,
9754 const int __P, const int __R)
9756 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
9757 (__v4sf) __Y, __P,
9758 (__mmask8) __M, __R);
9761 #else
9762 #define _kshiftli_mask16(X, Y) \
9763 ((__mmask16) __builtin_ia32_kshiftlihi ((__mmask16)(X), (__mmask8)(Y)))
9765 #define _kshiftri_mask16(X, Y) \
9766 ((__mmask16) __builtin_ia32_kshiftrihi ((__mmask16)(X), (__mmask8)(Y)))
9768 #define _mm512_cmp_epi64_mask(X, Y, P) \
9769 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
9770 (__v8di)(__m512i)(Y), (int)(P),\
9771 (__mmask8)-1))
9773 #define _mm512_cmp_epi32_mask(X, Y, P) \
9774 ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
9775 (__v16si)(__m512i)(Y), (int)(P), \
9776 (__mmask16)-1))
9778 #define _mm512_cmp_epu64_mask(X, Y, P) \
9779 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
9780 (__v8di)(__m512i)(Y), (int)(P),\
9781 (__mmask8)-1))
9783 #define _mm512_cmp_epu32_mask(X, Y, P) \
9784 ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
9785 (__v16si)(__m512i)(Y), (int)(P), \
9786 (__mmask16)-1))
9788 #define _mm512_cmp_round_pd_mask(X, Y, P, R) \
9789 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
9790 (__v8df)(__m512d)(Y), (int)(P),\
9791 (__mmask8)-1, R))
9793 #define _mm512_cmp_round_ps_mask(X, Y, P, R) \
9794 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
9795 (__v16sf)(__m512)(Y), (int)(P),\
9796 (__mmask16)-1, R))
9798 #define _mm512_mask_cmp_epi64_mask(M, X, Y, P) \
9799 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
9800 (__v8di)(__m512i)(Y), (int)(P),\
9801 (__mmask8)M))
9803 #define _mm512_mask_cmp_epi32_mask(M, X, Y, P) \
9804 ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
9805 (__v16si)(__m512i)(Y), (int)(P), \
9806 (__mmask16)M))
9808 #define _mm512_mask_cmp_epu64_mask(M, X, Y, P) \
9809 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
9810 (__v8di)(__m512i)(Y), (int)(P),\
9811 (__mmask8)M))
9813 #define _mm512_mask_cmp_epu32_mask(M, X, Y, P) \
9814 ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
9815 (__v16si)(__m512i)(Y), (int)(P), \
9816 (__mmask16)M))
9818 #define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R) \
9819 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
9820 (__v8df)(__m512d)(Y), (int)(P),\
9821 (__mmask8)M, R))
9823 #define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R) \
9824 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
9825 (__v16sf)(__m512)(Y), (int)(P),\
9826 (__mmask16)M, R))
9828 #define _mm_cmp_round_sd_mask(X, Y, P, R) \
9829 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
9830 (__v2df)(__m128d)(Y), (int)(P),\
9831 (__mmask8)-1, R))
9833 #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
9834 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
9835 (__v2df)(__m128d)(Y), (int)(P),\
9836 (M), R))
9838 #define _mm_cmp_round_ss_mask(X, Y, P, R) \
9839 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
9840 (__v4sf)(__m128)(Y), (int)(P), \
9841 (__mmask8)-1, R))
9843 #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
9844 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
9845 (__v4sf)(__m128)(Y), (int)(P), \
9846 (M), R))
9847 #endif
9849 #ifdef __OPTIMIZE__
9850 extern __inline __m512
9851 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9852 _mm512_i32gather_ps (__m512i __index, void const *__addr, int __scale)
9854 __m512 __v1_old = _mm512_undefined_ps ();
9855 __mmask16 __mask = 0xFFFF;
9857 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) __v1_old,
9858 __addr,
9859 (__v16si) __index,
9860 __mask, __scale);
9863 extern __inline __m512
9864 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9865 _mm512_mask_i32gather_ps (__m512 __v1_old, __mmask16 __mask,
9866 __m512i __index, void const *__addr, int __scale)
9868 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) __v1_old,
9869 __addr,
9870 (__v16si) __index,
9871 __mask, __scale);
9874 extern __inline __m512d
9875 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9876 _mm512_i32gather_pd (__m256i __index, void const *__addr, int __scale)
9878 __m512d __v1_old = _mm512_undefined_pd ();
9879 __mmask8 __mask = 0xFF;
9881 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
9882 __addr,
9883 (__v8si) __index, __mask,
9884 __scale);
9887 extern __inline __m512d
9888 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9889 _mm512_mask_i32gather_pd (__m512d __v1_old, __mmask8 __mask,
9890 __m256i __index, void const *__addr, int __scale)
9892 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
9893 __addr,
9894 (__v8si) __index,
9895 __mask, __scale);
9898 extern __inline __m256
9899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9900 _mm512_i64gather_ps (__m512i __index, void const *__addr, int __scale)
9902 __m256 __v1_old = _mm256_undefined_ps ();
9903 __mmask8 __mask = 0xFF;
9905 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
9906 __addr,
9907 (__v8di) __index, __mask,
9908 __scale);
9911 extern __inline __m256
9912 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9913 _mm512_mask_i64gather_ps (__m256 __v1_old, __mmask8 __mask,
9914 __m512i __index, void const *__addr, int __scale)
9916 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
9917 __addr,
9918 (__v8di) __index,
9919 __mask, __scale);
9922 extern __inline __m512d
9923 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9924 _mm512_i64gather_pd (__m512i __index, void const *__addr, int __scale)
9926 __m512d __v1_old = _mm512_undefined_pd ();
9927 __mmask8 __mask = 0xFF;
9929 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
9930 __addr,
9931 (__v8di) __index, __mask,
9932 __scale);
9935 extern __inline __m512d
9936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9937 _mm512_mask_i64gather_pd (__m512d __v1_old, __mmask8 __mask,
9938 __m512i __index, void const *__addr, int __scale)
9940 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
9941 __addr,
9942 (__v8di) __index,
9943 __mask, __scale);
9946 extern __inline __m512i
9947 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9948 _mm512_i32gather_epi32 (__m512i __index, void const *__addr, int __scale)
9950 __m512i __v1_old = _mm512_undefined_epi32 ();
9951 __mmask16 __mask = 0xFFFF;
9953 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
9954 __addr,
9955 (__v16si) __index,
9956 __mask, __scale);
9959 extern __inline __m512i
9960 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9961 _mm512_mask_i32gather_epi32 (__m512i __v1_old, __mmask16 __mask,
9962 __m512i __index, void const *__addr, int __scale)
9964 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
9965 __addr,
9966 (__v16si) __index,
9967 __mask, __scale);
9970 extern __inline __m512i
9971 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9972 _mm512_i32gather_epi64 (__m256i __index, void const *__addr, int __scale)
9974 __m512i __v1_old = _mm512_undefined_epi32 ();
9975 __mmask8 __mask = 0xFF;
9977 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
9978 __addr,
9979 (__v8si) __index, __mask,
9980 __scale);
9983 extern __inline __m512i
9984 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9985 _mm512_mask_i32gather_epi64 (__m512i __v1_old, __mmask8 __mask,
9986 __m256i __index, void const *__addr,
9987 int __scale)
9989 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
9990 __addr,
9991 (__v8si) __index,
9992 __mask, __scale);
9995 extern __inline __m256i
9996 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9997 _mm512_i64gather_epi32 (__m512i __index, void const *__addr, int __scale)
9999 __m256i __v1_old = _mm256_undefined_si256 ();
10000 __mmask8 __mask = 0xFF;
10002 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
10003 __addr,
10004 (__v8di) __index,
10005 __mask, __scale);
10008 extern __inline __m256i
10009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10010 _mm512_mask_i64gather_epi32 (__m256i __v1_old, __mmask8 __mask,
10011 __m512i __index, void const *__addr, int __scale)
10013 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
10014 __addr,
10015 (__v8di) __index,
10016 __mask, __scale);
10019 extern __inline __m512i
10020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10021 _mm512_i64gather_epi64 (__m512i __index, void const *__addr, int __scale)
10023 __m512i __v1_old = _mm512_undefined_epi32 ();
10024 __mmask8 __mask = 0xFF;
10026 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
10027 __addr,
10028 (__v8di) __index, __mask,
10029 __scale);
10032 extern __inline __m512i
10033 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10034 _mm512_mask_i64gather_epi64 (__m512i __v1_old, __mmask8 __mask,
10035 __m512i __index, void const *__addr,
10036 int __scale)
10038 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
10039 __addr,
10040 (__v8di) __index,
10041 __mask, __scale);
10044 extern __inline void
10045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10046 _mm512_i32scatter_ps (void *__addr, __m512i __index, __m512 __v1, int __scale)
10048 __builtin_ia32_scattersiv16sf (__addr, (__mmask16) 0xFFFF,
10049 (__v16si) __index, (__v16sf) __v1, __scale);
10052 extern __inline void
10053 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10054 _mm512_mask_i32scatter_ps (void *__addr, __mmask16 __mask,
10055 __m512i __index, __m512 __v1, int __scale)
10057 __builtin_ia32_scattersiv16sf (__addr, __mask, (__v16si) __index,
10058 (__v16sf) __v1, __scale);
10061 extern __inline void
10062 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10063 _mm512_i32scatter_pd (void *__addr, __m256i __index, __m512d __v1,
10064 int __scale)
10066 __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF,
10067 (__v8si) __index, (__v8df) __v1, __scale);
10070 extern __inline void
10071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10072 _mm512_mask_i32scatter_pd (void *__addr, __mmask8 __mask,
10073 __m256i __index, __m512d __v1, int __scale)
10075 __builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index,
10076 (__v8df) __v1, __scale);
10079 extern __inline void
10080 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10081 _mm512_i64scatter_ps (void *__addr, __m512i __index, __m256 __v1, int __scale)
10083 __builtin_ia32_scatterdiv16sf (__addr, (__mmask8) 0xFF,
10084 (__v8di) __index, (__v8sf) __v1, __scale);
10087 extern __inline void
10088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10089 _mm512_mask_i64scatter_ps (void *__addr, __mmask8 __mask,
10090 __m512i __index, __m256 __v1, int __scale)
10092 __builtin_ia32_scatterdiv16sf (__addr, __mask, (__v8di) __index,
10093 (__v8sf) __v1, __scale);
10096 extern __inline void
10097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10098 _mm512_i64scatter_pd (void *__addr, __m512i __index, __m512d __v1,
10099 int __scale)
10101 __builtin_ia32_scatterdiv8df (__addr, (__mmask8) 0xFF,
10102 (__v8di) __index, (__v8df) __v1, __scale);
10105 extern __inline void
10106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10107 _mm512_mask_i64scatter_pd (void *__addr, __mmask8 __mask,
10108 __m512i __index, __m512d __v1, int __scale)
10110 __builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index,
10111 (__v8df) __v1, __scale);
10114 extern __inline void
10115 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10116 _mm512_i32scatter_epi32 (void *__addr, __m512i __index,
10117 __m512i __v1, int __scale)
10119 __builtin_ia32_scattersiv16si (__addr, (__mmask16) 0xFFFF,
10120 (__v16si) __index, (__v16si) __v1, __scale);
10123 extern __inline void
10124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10125 _mm512_mask_i32scatter_epi32 (void *__addr, __mmask16 __mask,
10126 __m512i __index, __m512i __v1, int __scale)
10128 __builtin_ia32_scattersiv16si (__addr, __mask, (__v16si) __index,
10129 (__v16si) __v1, __scale);
10132 extern __inline void
10133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10134 _mm512_i32scatter_epi64 (void *__addr, __m256i __index,
10135 __m512i __v1, int __scale)
10137 __builtin_ia32_scattersiv8di (__addr, (__mmask8) 0xFF,
10138 (__v8si) __index, (__v8di) __v1, __scale);
10141 extern __inline void
10142 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10143 _mm512_mask_i32scatter_epi64 (void *__addr, __mmask8 __mask,
10144 __m256i __index, __m512i __v1, int __scale)
10146 __builtin_ia32_scattersiv8di (__addr, __mask, (__v8si) __index,
10147 (__v8di) __v1, __scale);
10150 extern __inline void
10151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10152 _mm512_i64scatter_epi32 (void *__addr, __m512i __index,
10153 __m256i __v1, int __scale)
10155 __builtin_ia32_scatterdiv16si (__addr, (__mmask8) 0xFF,
10156 (__v8di) __index, (__v8si) __v1, __scale);
10159 extern __inline void
10160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10161 _mm512_mask_i64scatter_epi32 (void *__addr, __mmask8 __mask,
10162 __m512i __index, __m256i __v1, int __scale)
10164 __builtin_ia32_scatterdiv16si (__addr, __mask, (__v8di) __index,
10165 (__v8si) __v1, __scale);
10168 extern __inline void
10169 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10170 _mm512_i64scatter_epi64 (void *__addr, __m512i __index,
10171 __m512i __v1, int __scale)
10173 __builtin_ia32_scatterdiv8di (__addr, (__mmask8) 0xFF,
10174 (__v8di) __index, (__v8di) __v1, __scale);
10177 extern __inline void
10178 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10179 _mm512_mask_i64scatter_epi64 (void *__addr, __mmask8 __mask,
10180 __m512i __index, __m512i __v1, int __scale)
10182 __builtin_ia32_scatterdiv8di (__addr, __mask, (__v8di) __index,
10183 (__v8di) __v1, __scale);
10185 #else
10186 #define _mm512_i32gather_ps(INDEX, ADDR, SCALE) \
10187 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)_mm512_undefined_ps(),\
10188 (void const *)ADDR, \
10189 (__v16si)(__m512i)INDEX, \
10190 (__mmask16)0xFFFF, (int)SCALE)
10192 #define _mm512_mask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
10193 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)(__m512)V1OLD, \
10194 (void const *)ADDR, \
10195 (__v16si)(__m512i)INDEX, \
10196 (__mmask16)MASK, (int)SCALE)
10198 #define _mm512_i32gather_pd(INDEX, ADDR, SCALE) \
10199 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)_mm512_undefined_pd(), \
10200 (void const *)ADDR, \
10201 (__v8si)(__m256i)INDEX, \
10202 (__mmask8)0xFF, (int)SCALE)
10204 #define _mm512_mask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
10205 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)(__m512d)V1OLD, \
10206 (void const *)ADDR, \
10207 (__v8si)(__m256i)INDEX, \
10208 (__mmask8)MASK, (int)SCALE)
10210 #define _mm512_i64gather_ps(INDEX, ADDR, SCALE) \
10211 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)_mm256_undefined_ps(), \
10212 (void const *)ADDR, \
10213 (__v8di)(__m512i)INDEX, \
10214 (__mmask8)0xFF, (int)SCALE)
10216 #define _mm512_mask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
10217 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)(__m256)V1OLD, \
10218 (void const *)ADDR, \
10219 (__v8di)(__m512i)INDEX, \
10220 (__mmask8)MASK, (int)SCALE)
10222 #define _mm512_i64gather_pd(INDEX, ADDR, SCALE) \
10223 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)_mm512_undefined_pd(), \
10224 (void const *)ADDR, \
10225 (__v8di)(__m512i)INDEX, \
10226 (__mmask8)0xFF, (int)SCALE)
10228 #define _mm512_mask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
10229 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)(__m512d)V1OLD, \
10230 (void const *)ADDR, \
10231 (__v8di)(__m512i)INDEX, \
10232 (__mmask8)MASK, (int)SCALE)
10234 #define _mm512_i32gather_epi32(INDEX, ADDR, SCALE) \
10235 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_undefined_epi32 (), \
10236 (void const *)ADDR, \
10237 (__v16si)(__m512i)INDEX, \
10238 (__mmask16)0xFFFF, (int)SCALE)
10240 #define _mm512_mask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
10241 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)(__m512i)V1OLD, \
10242 (void const *)ADDR, \
10243 (__v16si)(__m512i)INDEX, \
10244 (__mmask16)MASK, (int)SCALE)
10246 #define _mm512_i32gather_epi64(INDEX, ADDR, SCALE) \
10247 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_undefined_epi32 (), \
10248 (void const *)ADDR, \
10249 (__v8si)(__m256i)INDEX, \
10250 (__mmask8)0xFF, (int)SCALE)
10252 #define _mm512_mask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
10253 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)(__m512i)V1OLD, \
10254 (void const *)ADDR, \
10255 (__v8si)(__m256i)INDEX, \
10256 (__mmask8)MASK, (int)SCALE)
10258 #define _mm512_i64gather_epi32(INDEX, ADDR, SCALE) \
10259 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_undefined_si256(), \
10260 (void const *)ADDR, \
10261 (__v8di)(__m512i)INDEX, \
10262 (__mmask8)0xFF, (int)SCALE)
10264 #define _mm512_mask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
10265 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)(__m256i)V1OLD, \
10266 (void const *)ADDR, \
10267 (__v8di)(__m512i)INDEX, \
10268 (__mmask8)MASK, (int)SCALE)
10270 #define _mm512_i64gather_epi64(INDEX, ADDR, SCALE) \
10271 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_undefined_epi32 (), \
10272 (void const *)ADDR, \
10273 (__v8di)(__m512i)INDEX, \
10274 (__mmask8)0xFF, (int)SCALE)
10276 #define _mm512_mask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
10277 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)(__m512i)V1OLD, \
10278 (void const *)ADDR, \
10279 (__v8di)(__m512i)INDEX, \
10280 (__mmask8)MASK, (int)SCALE)
10282 #define _mm512_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
10283 __builtin_ia32_scattersiv16sf ((void *)ADDR, (__mmask16)0xFFFF, \
10284 (__v16si)(__m512i)INDEX, \
10285 (__v16sf)(__m512)V1, (int)SCALE)
10287 #define _mm512_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
10288 __builtin_ia32_scattersiv16sf ((void *)ADDR, (__mmask16)MASK, \
10289 (__v16si)(__m512i)INDEX, \
10290 (__v16sf)(__m512)V1, (int)SCALE)
10292 #define _mm512_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
10293 __builtin_ia32_scattersiv8df ((void *)ADDR, (__mmask8)0xFF, \
10294 (__v8si)(__m256i)INDEX, \
10295 (__v8df)(__m512d)V1, (int)SCALE)
10297 #define _mm512_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
10298 __builtin_ia32_scattersiv8df ((void *)ADDR, (__mmask8)MASK, \
10299 (__v8si)(__m256i)INDEX, \
10300 (__v8df)(__m512d)V1, (int)SCALE)
10302 #define _mm512_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
10303 __builtin_ia32_scatterdiv16sf ((void *)ADDR, (__mmask8)0xFF, \
10304 (__v8di)(__m512i)INDEX, \
10305 (__v8sf)(__m256)V1, (int)SCALE)
10307 #define _mm512_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
10308 __builtin_ia32_scatterdiv16sf ((void *)ADDR, (__mmask16)MASK, \
10309 (__v8di)(__m512i)INDEX, \
10310 (__v8sf)(__m256)V1, (int)SCALE)
10312 #define _mm512_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
10313 __builtin_ia32_scatterdiv8df ((void *)ADDR, (__mmask8)0xFF, \
10314 (__v8di)(__m512i)INDEX, \
10315 (__v8df)(__m512d)V1, (int)SCALE)
10317 #define _mm512_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
10318 __builtin_ia32_scatterdiv8df ((void *)ADDR, (__mmask8)MASK, \
10319 (__v8di)(__m512i)INDEX, \
10320 (__v8df)(__m512d)V1, (int)SCALE)
10322 #define _mm512_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
10323 __builtin_ia32_scattersiv16si ((void *)ADDR, (__mmask16)0xFFFF, \
10324 (__v16si)(__m512i)INDEX, \
10325 (__v16si)(__m512i)V1, (int)SCALE)
10327 #define _mm512_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
10328 __builtin_ia32_scattersiv16si ((void *)ADDR, (__mmask16)MASK, \
10329 (__v16si)(__m512i)INDEX, \
10330 (__v16si)(__m512i)V1, (int)SCALE)
10332 #define _mm512_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
10333 __builtin_ia32_scattersiv8di ((void *)ADDR, (__mmask8)0xFF, \
10334 (__v8si)(__m256i)INDEX, \
10335 (__v8di)(__m512i)V1, (int)SCALE)
10337 #define _mm512_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
10338 __builtin_ia32_scattersiv8di ((void *)ADDR, (__mmask8)MASK, \
10339 (__v8si)(__m256i)INDEX, \
10340 (__v8di)(__m512i)V1, (int)SCALE)
10342 #define _mm512_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
10343 __builtin_ia32_scatterdiv16si ((void *)ADDR, (__mmask8)0xFF, \
10344 (__v8di)(__m512i)INDEX, \
10345 (__v8si)(__m256i)V1, (int)SCALE)
10347 #define _mm512_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
10348 __builtin_ia32_scatterdiv16si ((void *)ADDR, (__mmask8)MASK, \
10349 (__v8di)(__m512i)INDEX, \
10350 (__v8si)(__m256i)V1, (int)SCALE)
10352 #define _mm512_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
10353 __builtin_ia32_scatterdiv8di ((void *)ADDR, (__mmask8)0xFF, \
10354 (__v8di)(__m512i)INDEX, \
10355 (__v8di)(__m512i)V1, (int)SCALE)
10357 #define _mm512_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
10358 __builtin_ia32_scatterdiv8di ((void *)ADDR, (__mmask8)MASK, \
10359 (__v8di)(__m512i)INDEX, \
10360 (__v8di)(__m512i)V1, (int)SCALE)
10361 #endif
10363 extern __inline __m512d
10364 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10365 _mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
10367 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
10368 (__v8df) __W,
10369 (__mmask8) __U);
10372 extern __inline __m512d
10373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10374 _mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
10376 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
10377 (__v8df)
10378 _mm512_setzero_pd (),
10379 (__mmask8) __U);
10382 extern __inline void
10383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10384 _mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
10386 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
10387 (__mmask8) __U);
10390 extern __inline __m512
10391 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10392 _mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
10394 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
10395 (__v16sf) __W,
10396 (__mmask16) __U);
10399 extern __inline __m512
10400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10401 _mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
10403 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
10404 (__v16sf)
10405 _mm512_setzero_ps (),
10406 (__mmask16) __U);
10409 extern __inline void
10410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10411 _mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
10413 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
10414 (__mmask16) __U);
10417 extern __inline __m512i
10418 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10419 _mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
10421 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
10422 (__v8di) __W,
10423 (__mmask8) __U);
10426 extern __inline __m512i
10427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10428 _mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
10430 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
10431 (__v8di)
10432 _mm512_setzero_si512 (),
10433 (__mmask8) __U);
10436 extern __inline void
10437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10438 _mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
10440 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
10441 (__mmask8) __U);
10444 extern __inline __m512i
10445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10446 _mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
10448 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
10449 (__v16si) __W,
10450 (__mmask16) __U);
10453 extern __inline __m512i
10454 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10455 _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
10457 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
10458 (__v16si)
10459 _mm512_setzero_si512 (),
10460 (__mmask16) __U);
10463 extern __inline void
10464 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10465 _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
10467 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
10468 (__mmask16) __U);
10471 extern __inline __m512d
10472 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10473 _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
10475 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
10476 (__v8df) __W,
10477 (__mmask8) __U);
10480 extern __inline __m512d
10481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10482 _mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
10484 return (__m512d) __builtin_ia32_expanddf512_maskz ((__v8df) __A,
10485 (__v8df)
10486 _mm512_setzero_pd (),
10487 (__mmask8) __U);
10490 extern __inline __m512d
10491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10492 _mm512_mask_expandloadu_pd (__m512d __W, __mmask8 __U, void const *__P)
10494 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *) __P,
10495 (__v8df) __W,
10496 (__mmask8) __U);
10499 extern __inline __m512d
10500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10501 _mm512_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
10503 return (__m512d) __builtin_ia32_expandloaddf512_maskz ((const __v8df *) __P,
10504 (__v8df)
10505 _mm512_setzero_pd (),
10506 (__mmask8) __U);
10509 extern __inline __m512
10510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10511 _mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
10513 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
10514 (__v16sf) __W,
10515 (__mmask16) __U);
10518 extern __inline __m512
10519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10520 _mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
10522 return (__m512) __builtin_ia32_expandsf512_maskz ((__v16sf) __A,
10523 (__v16sf)
10524 _mm512_setzero_ps (),
10525 (__mmask16) __U);
10528 extern __inline __m512
10529 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10530 _mm512_mask_expandloadu_ps (__m512 __W, __mmask16 __U, void const *__P)
10532 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *) __P,
10533 (__v16sf) __W,
10534 (__mmask16) __U);
10537 extern __inline __m512
10538 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10539 _mm512_maskz_expandloadu_ps (__mmask16 __U, void const *__P)
10541 return (__m512) __builtin_ia32_expandloadsf512_maskz ((const __v16sf *) __P,
10542 (__v16sf)
10543 _mm512_setzero_ps (),
10544 (__mmask16) __U);
10547 extern __inline __m512i
10548 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10549 _mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
10551 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
10552 (__v8di) __W,
10553 (__mmask8) __U);
10556 extern __inline __m512i
10557 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10558 _mm512_maskz_expand_epi64 (__mmask8 __U, __m512i __A)
10560 return (__m512i) __builtin_ia32_expanddi512_maskz ((__v8di) __A,
10561 (__v8di)
10562 _mm512_setzero_si512 (),
10563 (__mmask8) __U);
10566 extern __inline __m512i
10567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10568 _mm512_mask_expandloadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
10570 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *) __P,
10571 (__v8di) __W,
10572 (__mmask8) __U);
10575 extern __inline __m512i
10576 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10577 _mm512_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
10579 return (__m512i)
10580 __builtin_ia32_expandloaddi512_maskz ((const __v8di *) __P,
10581 (__v8di)
10582 _mm512_setzero_si512 (),
10583 (__mmask8) __U);
10586 extern __inline __m512i
10587 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10588 _mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
10590 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
10591 (__v16si) __W,
10592 (__mmask16) __U);
10595 extern __inline __m512i
10596 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10597 _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
10599 return (__m512i) __builtin_ia32_expandsi512_maskz ((__v16si) __A,
10600 (__v16si)
10601 _mm512_setzero_si512 (),
10602 (__mmask16) __U);
10605 extern __inline __m512i
10606 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10607 _mm512_mask_expandloadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
10609 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *) __P,
10610 (__v16si) __W,
10611 (__mmask16) __U);
10614 extern __inline __m512i
10615 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10616 _mm512_maskz_expandloadu_epi32 (__mmask16 __U, void const *__P)
10618 return (__m512i) __builtin_ia32_expandloadsi512_maskz ((const __v16si *) __P,
10619 (__v16si)
10620 _mm512_setzero_si512
10621 (), (__mmask16) __U);
10624 /* Mask arithmetic operations */
10625 #define _kand_mask16 _mm512_kand
10626 #define _kandn_mask16 _mm512_kandn
10627 #define _knot_mask16 _mm512_knot
10628 #define _kor_mask16 _mm512_kor
10629 #define _kxnor_mask16 _mm512_kxnor
10630 #define _kxor_mask16 _mm512_kxor
10632 extern __inline unsigned char
10633 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10634 _kortest_mask16_u8 (__mmask16 __A, __mmask16 __B, unsigned char *__CF)
10636 *__CF = (unsigned char) __builtin_ia32_kortestchi (__A, __B);
10637 return (unsigned char) __builtin_ia32_kortestzhi (__A, __B);
10640 extern __inline unsigned char
10641 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10642 _kortestz_mask16_u8 (__mmask16 __A, __mmask16 __B)
10644 return (unsigned char) __builtin_ia32_kortestzhi ((__mmask16) __A,
10645 (__mmask16) __B);
10648 extern __inline unsigned char
10649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10650 _kortestc_mask16_u8 (__mmask16 __A, __mmask16 __B)
10652 return (unsigned char) __builtin_ia32_kortestchi ((__mmask16) __A,
10653 (__mmask16) __B);
10656 extern __inline unsigned int
10657 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10658 _cvtmask16_u32 (__mmask16 __A)
10660 return (unsigned int) __builtin_ia32_kmovw ((__mmask16 ) __A);
10663 extern __inline __mmask16
10664 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10665 _cvtu32_mask16 (unsigned int __A)
10667 return (__mmask16) __builtin_ia32_kmovw ((__mmask16 ) __A);
10670 extern __inline __mmask16
10671 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10672 _load_mask16 (__mmask16 *__A)
10674 return (__mmask16) __builtin_ia32_kmovw (*(__mmask16 *) __A);
10677 extern __inline void
10678 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10679 _store_mask16 (__mmask16 *__A, __mmask16 __B)
10681 *(__mmask16 *) __A = __builtin_ia32_kmovw (__B);
10684 extern __inline __mmask16
10685 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10686 _mm512_kand (__mmask16 __A, __mmask16 __B)
10688 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
10691 extern __inline __mmask16
10692 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10693 _mm512_kandn (__mmask16 __A, __mmask16 __B)
10695 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A,
10696 (__mmask16) __B);
10699 extern __inline __mmask16
10700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10701 _mm512_kor (__mmask16 __A, __mmask16 __B)
10703 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
10706 extern __inline int
10707 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10708 _mm512_kortestz (__mmask16 __A, __mmask16 __B)
10710 return (__mmask16) __builtin_ia32_kortestzhi ((__mmask16) __A,
10711 (__mmask16) __B);
10714 extern __inline int
10715 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10716 _mm512_kortestc (__mmask16 __A, __mmask16 __B)
10718 return (__mmask16) __builtin_ia32_kortestchi ((__mmask16) __A,
10719 (__mmask16) __B);
10722 extern __inline __mmask16
10723 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10724 _mm512_kxnor (__mmask16 __A, __mmask16 __B)
10726 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
10729 extern __inline __mmask16
10730 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10731 _mm512_kxor (__mmask16 __A, __mmask16 __B)
10733 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
10736 extern __inline __mmask16
10737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10738 _mm512_knot (__mmask16 __A)
10740 return (__mmask16) __builtin_ia32_knothi ((__mmask16) __A);
10743 extern __inline __mmask16
10744 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10745 _mm512_kunpackb (__mmask16 __A, __mmask16 __B)
10747 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
10750 extern __inline __mmask16
10751 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10752 _kunpackb_mask16 (__mmask8 __A, __mmask8 __B)
10754 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
10757 #ifdef __OPTIMIZE__
10758 extern __inline __m512i
10759 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10760 _mm512_maskz_inserti32x4 (__mmask16 __B, __m512i __C, __m128i __D,
10761 const int __imm)
10763 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
10764 (__v4si) __D,
10765 __imm,
10766 (__v16si)
10767 _mm512_setzero_si512 (),
10768 __B);
10771 extern __inline __m512
10772 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10773 _mm512_maskz_insertf32x4 (__mmask16 __B, __m512 __C, __m128 __D,
10774 const int __imm)
10776 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
10777 (__v4sf) __D,
10778 __imm,
10779 (__v16sf)
10780 _mm512_setzero_ps (), __B);
10783 extern __inline __m512i
10784 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10785 _mm512_mask_inserti32x4 (__m512i __A, __mmask16 __B, __m512i __C,
10786 __m128i __D, const int __imm)
10788 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
10789 (__v4si) __D,
10790 __imm,
10791 (__v16si) __A,
10792 __B);
10795 extern __inline __m512
10796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10797 _mm512_mask_insertf32x4 (__m512 __A, __mmask16 __B, __m512 __C,
10798 __m128 __D, const int __imm)
10800 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
10801 (__v4sf) __D,
10802 __imm,
10803 (__v16sf) __A, __B);
10805 #else
10806 #define _mm512_maskz_insertf32x4(A, X, Y, C) \
10807 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
10808 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)_mm512_setzero_ps(), \
10809 (__mmask8)(A)))
10811 #define _mm512_maskz_inserti32x4(A, X, Y, C) \
10812 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
10813 (__v4si)(__m128i) (Y), (int) (C), (__v16si)_mm512_setzero_si512 (), \
10814 (__mmask8)(A)))
10816 #define _mm512_mask_insertf32x4(A, B, X, Y, C) \
10817 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
10818 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (A), \
10819 (__mmask8)(B)))
10821 #define _mm512_mask_inserti32x4(A, B, X, Y, C) \
10822 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
10823 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (A), \
10824 (__mmask8)(B)))
10825 #endif
10827 extern __inline __m512i
10828 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10829 _mm512_max_epi64 (__m512i __A, __m512i __B)
10831 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10832 (__v8di) __B,
10833 (__v8di)
10834 _mm512_undefined_epi32 (),
10835 (__mmask8) -1);
10838 extern __inline __m512i
10839 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10840 _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
10842 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10843 (__v8di) __B,
10844 (__v8di)
10845 _mm512_setzero_si512 (),
10846 __M);
10849 extern __inline __m512i
10850 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10851 _mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10853 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10854 (__v8di) __B,
10855 (__v8di) __W, __M);
10858 extern __inline __m512i
10859 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10860 _mm512_min_epi64 (__m512i __A, __m512i __B)
10862 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10863 (__v8di) __B,
10864 (__v8di)
10865 _mm512_undefined_epi32 (),
10866 (__mmask8) -1);
10869 extern __inline __m512i
10870 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10871 _mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10873 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10874 (__v8di) __B,
10875 (__v8di) __W, __M);
10878 extern __inline __m512i
10879 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10880 _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
10882 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10883 (__v8di) __B,
10884 (__v8di)
10885 _mm512_setzero_si512 (),
10886 __M);
10889 extern __inline __m512i
10890 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10891 _mm512_max_epu64 (__m512i __A, __m512i __B)
10893 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10894 (__v8di) __B,
10895 (__v8di)
10896 _mm512_undefined_epi32 (),
10897 (__mmask8) -1);
10900 extern __inline __m512i
10901 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10902 _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
10904 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10905 (__v8di) __B,
10906 (__v8di)
10907 _mm512_setzero_si512 (),
10908 __M);
10911 extern __inline __m512i
10912 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10913 _mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10915 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10916 (__v8di) __B,
10917 (__v8di) __W, __M);
10920 extern __inline __m512i
10921 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10922 _mm512_min_epu64 (__m512i __A, __m512i __B)
10924 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10925 (__v8di) __B,
10926 (__v8di)
10927 _mm512_undefined_epi32 (),
10928 (__mmask8) -1);
10931 extern __inline __m512i
10932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10933 _mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10935 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10936 (__v8di) __B,
10937 (__v8di) __W, __M);
10940 extern __inline __m512i
10941 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10942 _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
10944 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10945 (__v8di) __B,
10946 (__v8di)
10947 _mm512_setzero_si512 (),
10948 __M);
10951 extern __inline __m512i
10952 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10953 _mm512_max_epi32 (__m512i __A, __m512i __B)
10955 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10956 (__v16si) __B,
10957 (__v16si)
10958 _mm512_undefined_epi32 (),
10959 (__mmask16) -1);
10962 extern __inline __m512i
10963 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10964 _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
10966 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10967 (__v16si) __B,
10968 (__v16si)
10969 _mm512_setzero_si512 (),
10970 __M);
10973 extern __inline __m512i
10974 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10975 _mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10977 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10978 (__v16si) __B,
10979 (__v16si) __W, __M);
10982 extern __inline __m512i
10983 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10984 _mm512_min_epi32 (__m512i __A, __m512i __B)
10986 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10987 (__v16si) __B,
10988 (__v16si)
10989 _mm512_undefined_epi32 (),
10990 (__mmask16) -1);
10993 extern __inline __m512i
10994 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10995 _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
10997 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10998 (__v16si) __B,
10999 (__v16si)
11000 _mm512_setzero_si512 (),
11001 __M);
11004 extern __inline __m512i
11005 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11006 _mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
11008 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
11009 (__v16si) __B,
11010 (__v16si) __W, __M);
11013 extern __inline __m512i
11014 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11015 _mm512_max_epu32 (__m512i __A, __m512i __B)
11017 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
11018 (__v16si) __B,
11019 (__v16si)
11020 _mm512_undefined_epi32 (),
11021 (__mmask16) -1);
11024 extern __inline __m512i
11025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11026 _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
11028 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
11029 (__v16si) __B,
11030 (__v16si)
11031 _mm512_setzero_si512 (),
11032 __M);
11035 extern __inline __m512i
11036 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11037 _mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
11039 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
11040 (__v16si) __B,
11041 (__v16si) __W, __M);
11044 extern __inline __m512i
11045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11046 _mm512_min_epu32 (__m512i __A, __m512i __B)
11048 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
11049 (__v16si) __B,
11050 (__v16si)
11051 _mm512_undefined_epi32 (),
11052 (__mmask16) -1);
11055 extern __inline __m512i
11056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11057 _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
11059 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
11060 (__v16si) __B,
11061 (__v16si)
11062 _mm512_setzero_si512 (),
11063 __M);
11066 extern __inline __m512i
11067 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11068 _mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
11070 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
11071 (__v16si) __B,
11072 (__v16si) __W, __M);
11075 extern __inline __m512
11076 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11077 _mm512_unpacklo_ps (__m512 __A, __m512 __B)
11079 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
11080 (__v16sf) __B,
11081 (__v16sf)
11082 _mm512_undefined_ps (),
11083 (__mmask16) -1);
11086 extern __inline __m512
11087 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11088 _mm512_mask_unpacklo_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11090 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
11091 (__v16sf) __B,
11092 (__v16sf) __W,
11093 (__mmask16) __U);
11096 extern __inline __m512
11097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11098 _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
11100 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
11101 (__v16sf) __B,
11102 (__v16sf)
11103 _mm512_setzero_ps (),
11104 (__mmask16) __U);
11107 #ifdef __OPTIMIZE__
11108 extern __inline __m128d
11109 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11110 _mm_max_round_sd (__m128d __A, __m128d __B, const int __R)
11112 return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
11113 (__v2df) __B,
11114 __R);
11117 extern __inline __m128d
11118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11119 _mm_mask_max_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
11120 __m128d __B, const int __R)
11122 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
11123 (__v2df) __B,
11124 (__v2df) __W,
11125 (__mmask8) __U, __R);
11128 extern __inline __m128d
11129 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11130 _mm_maskz_max_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
11131 const int __R)
11133 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
11134 (__v2df) __B,
11135 (__v2df)
11136 _mm_setzero_pd (),
11137 (__mmask8) __U, __R);
11140 extern __inline __m128
11141 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11142 _mm_max_round_ss (__m128 __A, __m128 __B, const int __R)
11144 return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
11145 (__v4sf) __B,
11146 __R);
11149 extern __inline __m128
11150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11151 _mm_mask_max_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
11152 __m128 __B, const int __R)
11154 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
11155 (__v4sf) __B,
11156 (__v4sf) __W,
11157 (__mmask8) __U, __R);
11160 extern __inline __m128
11161 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11162 _mm_maskz_max_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
11163 const int __R)
11165 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
11166 (__v4sf) __B,
11167 (__v4sf)
11168 _mm_setzero_ps (),
11169 (__mmask8) __U, __R);
11172 extern __inline __m128d
11173 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11174 _mm_min_round_sd (__m128d __A, __m128d __B, const int __R)
11176 return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
11177 (__v2df) __B,
11178 __R);
11181 extern __inline __m128d
11182 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11183 _mm_mask_min_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
11184 __m128d __B, const int __R)
11186 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
11187 (__v2df) __B,
11188 (__v2df) __W,
11189 (__mmask8) __U, __R);
11192 extern __inline __m128d
11193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11194 _mm_maskz_min_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
11195 const int __R)
11197 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
11198 (__v2df) __B,
11199 (__v2df)
11200 _mm_setzero_pd (),
11201 (__mmask8) __U, __R);
11204 extern __inline __m128
11205 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11206 _mm_min_round_ss (__m128 __A, __m128 __B, const int __R)
11208 return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
11209 (__v4sf) __B,
11210 __R);
11213 extern __inline __m128
11214 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11215 _mm_mask_min_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
11216 __m128 __B, const int __R)
11218 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
11219 (__v4sf) __B,
11220 (__v4sf) __W,
11221 (__mmask8) __U, __R);
11224 extern __inline __m128
11225 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11226 _mm_maskz_min_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
11227 const int __R)
11229 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
11230 (__v4sf) __B,
11231 (__v4sf)
11232 _mm_setzero_ps (),
11233 (__mmask8) __U, __R);
11236 #else
11237 #define _mm_max_round_sd(A, B, C) \
11238 (__m128d)__builtin_ia32_maxsd_round(A, B, C)
11240 #define _mm_mask_max_round_sd(W, U, A, B, C) \
11241 (__m128d)__builtin_ia32_maxsd_mask_round(A, B, W, U, C)
11243 #define _mm_maskz_max_round_sd(U, A, B, C) \
11244 (__m128d)__builtin_ia32_maxsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
11246 #define _mm_max_round_ss(A, B, C) \
11247 (__m128)__builtin_ia32_maxss_round(A, B, C)
11249 #define _mm_mask_max_round_ss(W, U, A, B, C) \
11250 (__m128)__builtin_ia32_maxss_mask_round(A, B, W, U, C)
11252 #define _mm_maskz_max_round_ss(U, A, B, C) \
11253 (__m128)__builtin_ia32_maxss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
11255 #define _mm_min_round_sd(A, B, C) \
11256 (__m128d)__builtin_ia32_minsd_round(A, B, C)
11258 #define _mm_mask_min_round_sd(W, U, A, B, C) \
11259 (__m128d)__builtin_ia32_minsd_mask_round(A, B, W, U, C)
11261 #define _mm_maskz_min_round_sd(U, A, B, C) \
11262 (__m128d)__builtin_ia32_minsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
11264 #define _mm_min_round_ss(A, B, C) \
11265 (__m128)__builtin_ia32_minss_round(A, B, C)
11267 #define _mm_mask_min_round_ss(W, U, A, B, C) \
11268 (__m128)__builtin_ia32_minss_mask_round(A, B, W, U, C)
11270 #define _mm_maskz_min_round_ss(U, A, B, C) \
11271 (__m128)__builtin_ia32_minss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
11273 #endif
11275 extern __inline __m512d
11276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11277 _mm512_mask_blend_pd (__mmask8 __U, __m512d __A, __m512d __W)
11279 return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
11280 (__v8df) __W,
11281 (__mmask8) __U);
11284 extern __inline __m512
11285 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11286 _mm512_mask_blend_ps (__mmask16 __U, __m512 __A, __m512 __W)
11288 return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
11289 (__v16sf) __W,
11290 (__mmask16) __U);
11293 extern __inline __m512i
11294 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11295 _mm512_mask_blend_epi64 (__mmask8 __U, __m512i __A, __m512i __W)
11297 return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
11298 (__v8di) __W,
11299 (__mmask8) __U);
11302 extern __inline __m512i
11303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11304 _mm512_mask_blend_epi32 (__mmask16 __U, __m512i __A, __m512i __W)
11306 return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
11307 (__v16si) __W,
11308 (__mmask16) __U);
11311 #ifdef __OPTIMIZE__
11312 extern __inline __m128d
11313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11314 _mm_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
11316 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
11317 (__v2df) __A,
11318 (__v2df) __B,
11319 __R);
11322 extern __inline __m128
11323 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11324 _mm_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
11326 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
11327 (__v4sf) __A,
11328 (__v4sf) __B,
11329 __R);
11332 extern __inline __m128d
11333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11334 _mm_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
11336 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
11337 (__v2df) __A,
11338 -(__v2df) __B,
11339 __R);
11342 extern __inline __m128
11343 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11344 _mm_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
11346 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
11347 (__v4sf) __A,
11348 -(__v4sf) __B,
11349 __R);
11352 extern __inline __m128d
11353 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11354 _mm_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
11356 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
11357 -(__v2df) __A,
11358 (__v2df) __B,
11359 __R);
11362 extern __inline __m128
11363 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11364 _mm_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
11366 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
11367 -(__v4sf) __A,
11368 (__v4sf) __B,
11369 __R);
11372 extern __inline __m128d
11373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11374 _mm_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
11376 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
11377 -(__v2df) __A,
11378 -(__v2df) __B,
11379 __R);
11382 extern __inline __m128
11383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11384 _mm_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
11386 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
11387 -(__v4sf) __A,
11388 -(__v4sf) __B,
11389 __R);
11391 #else
11392 #define _mm_fmadd_round_sd(A, B, C, R) \
11393 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, C, R)
11395 #define _mm_fmadd_round_ss(A, B, C, R) \
11396 (__m128)__builtin_ia32_vfmaddss3_round(A, B, C, R)
11398 #define _mm_fmsub_round_sd(A, B, C, R) \
11399 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, -(C), R)
11401 #define _mm_fmsub_round_ss(A, B, C, R) \
11402 (__m128)__builtin_ia32_vfmaddss3_round(A, B, -(C), R)
11404 #define _mm_fnmadd_round_sd(A, B, C, R) \
11405 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), C, R)
11407 #define _mm_fnmadd_round_ss(A, B, C, R) \
11408 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), C, R)
11410 #define _mm_fnmsub_round_sd(A, B, C, R) \
11411 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), -(C), R)
11413 #define _mm_fnmsub_round_ss(A, B, C, R) \
11414 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), -(C), R)
11415 #endif
11417 #ifdef __OPTIMIZE__
11418 extern __inline int
11419 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11420 _mm_comi_round_ss (__m128 __A, __m128 __B, const int __P, const int __R)
11422 return __builtin_ia32_vcomiss ((__v4sf) __A, (__v4sf) __B, __P, __R);
11425 extern __inline int
11426 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11427 _mm_comi_round_sd (__m128d __A, __m128d __B, const int __P, const int __R)
11429 return __builtin_ia32_vcomisd ((__v2df) __A, (__v2df) __B, __P, __R);
11431 #else
11432 #define _mm_comi_round_ss(A, B, C, D)\
11433 __builtin_ia32_vcomiss(A, B, C, D)
11434 #define _mm_comi_round_sd(A, B, C, D)\
11435 __builtin_ia32_vcomisd(A, B, C, D)
11436 #endif
11438 extern __inline __m512d
11439 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11440 _mm512_sqrt_pd (__m512d __A)
11442 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
11443 (__v8df)
11444 _mm512_undefined_pd (),
11445 (__mmask8) -1,
11446 _MM_FROUND_CUR_DIRECTION);
11449 extern __inline __m512d
11450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11451 _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
11453 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
11454 (__v8df) __W,
11455 (__mmask8) __U,
11456 _MM_FROUND_CUR_DIRECTION);
11459 extern __inline __m512d
11460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11461 _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
11463 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
11464 (__v8df)
11465 _mm512_setzero_pd (),
11466 (__mmask8) __U,
11467 _MM_FROUND_CUR_DIRECTION);
11470 extern __inline __m512
11471 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11472 _mm512_sqrt_ps (__m512 __A)
11474 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
11475 (__v16sf)
11476 _mm512_undefined_ps (),
11477 (__mmask16) -1,
11478 _MM_FROUND_CUR_DIRECTION);
11481 extern __inline __m512
11482 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11483 _mm512_mask_sqrt_ps (__m512 __W, __mmask16 __U, __m512 __A)
11485 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
11486 (__v16sf) __W,
11487 (__mmask16) __U,
11488 _MM_FROUND_CUR_DIRECTION);
11491 extern __inline __m512
11492 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11493 _mm512_maskz_sqrt_ps (__mmask16 __U, __m512 __A)
11495 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
11496 (__v16sf)
11497 _mm512_setzero_ps (),
11498 (__mmask16) __U,
11499 _MM_FROUND_CUR_DIRECTION);
11502 extern __inline __m512d
11503 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11504 _mm512_add_pd (__m512d __A, __m512d __B)
11506 return (__m512d) ((__v8df)__A + (__v8df)__B);
11509 extern __inline __m512d
11510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11511 _mm512_mask_add_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11513 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
11514 (__v8df) __B,
11515 (__v8df) __W,
11516 (__mmask8) __U,
11517 _MM_FROUND_CUR_DIRECTION);
11520 extern __inline __m512d
11521 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11522 _mm512_maskz_add_pd (__mmask8 __U, __m512d __A, __m512d __B)
11524 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
11525 (__v8df) __B,
11526 (__v8df)
11527 _mm512_setzero_pd (),
11528 (__mmask8) __U,
11529 _MM_FROUND_CUR_DIRECTION);
11532 extern __inline __m512
11533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11534 _mm512_add_ps (__m512 __A, __m512 __B)
11536 return (__m512) ((__v16sf)__A + (__v16sf)__B);
11539 extern __inline __m512
11540 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11541 _mm512_mask_add_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11543 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
11544 (__v16sf) __B,
11545 (__v16sf) __W,
11546 (__mmask16) __U,
11547 _MM_FROUND_CUR_DIRECTION);
11550 extern __inline __m512
11551 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11552 _mm512_maskz_add_ps (__mmask16 __U, __m512 __A, __m512 __B)
11554 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
11555 (__v16sf) __B,
11556 (__v16sf)
11557 _mm512_setzero_ps (),
11558 (__mmask16) __U,
11559 _MM_FROUND_CUR_DIRECTION);
11562 extern __inline __m128d
11563 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11564 _mm_mask_add_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
11566 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
11567 (__v2df) __B,
11568 (__v2df) __W,
11569 (__mmask8) __U,
11570 _MM_FROUND_CUR_DIRECTION);
11573 extern __inline __m128d
11574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11575 _mm_maskz_add_sd (__mmask8 __U, __m128d __A, __m128d __B)
11577 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
11578 (__v2df) __B,
11579 (__v2df)
11580 _mm_setzero_pd (),
11581 (__mmask8) __U,
11582 _MM_FROUND_CUR_DIRECTION);
11585 extern __inline __m128
11586 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11587 _mm_mask_add_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
11589 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
11590 (__v4sf) __B,
11591 (__v4sf) __W,
11592 (__mmask8) __U,
11593 _MM_FROUND_CUR_DIRECTION);
11596 extern __inline __m128
11597 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11598 _mm_maskz_add_ss (__mmask8 __U, __m128 __A, __m128 __B)
11600 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
11601 (__v4sf) __B,
11602 (__v4sf)
11603 _mm_setzero_ps (),
11604 (__mmask8) __U,
11605 _MM_FROUND_CUR_DIRECTION);
11608 extern __inline __m512d
11609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11610 _mm512_sub_pd (__m512d __A, __m512d __B)
11612 return (__m512d) ((__v8df)__A - (__v8df)__B);
11615 extern __inline __m512d
11616 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11617 _mm512_mask_sub_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11619 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
11620 (__v8df) __B,
11621 (__v8df) __W,
11622 (__mmask8) __U,
11623 _MM_FROUND_CUR_DIRECTION);
11626 extern __inline __m512d
11627 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11628 _mm512_maskz_sub_pd (__mmask8 __U, __m512d __A, __m512d __B)
11630 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
11631 (__v8df) __B,
11632 (__v8df)
11633 _mm512_setzero_pd (),
11634 (__mmask8) __U,
11635 _MM_FROUND_CUR_DIRECTION);
11638 extern __inline __m512
11639 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11640 _mm512_sub_ps (__m512 __A, __m512 __B)
11642 return (__m512) ((__v16sf)__A - (__v16sf)__B);
11645 extern __inline __m512
11646 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11647 _mm512_mask_sub_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11649 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
11650 (__v16sf) __B,
11651 (__v16sf) __W,
11652 (__mmask16) __U,
11653 _MM_FROUND_CUR_DIRECTION);
11656 extern __inline __m512
11657 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11658 _mm512_maskz_sub_ps (__mmask16 __U, __m512 __A, __m512 __B)
11660 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
11661 (__v16sf) __B,
11662 (__v16sf)
11663 _mm512_setzero_ps (),
11664 (__mmask16) __U,
11665 _MM_FROUND_CUR_DIRECTION);
11668 extern __inline __m128d
11669 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11670 _mm_mask_sub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
11672 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
11673 (__v2df) __B,
11674 (__v2df) __W,
11675 (__mmask8) __U,
11676 _MM_FROUND_CUR_DIRECTION);
11679 extern __inline __m128d
11680 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11681 _mm_maskz_sub_sd (__mmask8 __U, __m128d __A, __m128d __B)
11683 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
11684 (__v2df) __B,
11685 (__v2df)
11686 _mm_setzero_pd (),
11687 (__mmask8) __U,
11688 _MM_FROUND_CUR_DIRECTION);
11691 extern __inline __m128
11692 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11693 _mm_mask_sub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
11695 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
11696 (__v4sf) __B,
11697 (__v4sf) __W,
11698 (__mmask8) __U,
11699 _MM_FROUND_CUR_DIRECTION);
11702 extern __inline __m128
11703 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11704 _mm_maskz_sub_ss (__mmask8 __U, __m128 __A, __m128 __B)
11706 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
11707 (__v4sf) __B,
11708 (__v4sf)
11709 _mm_setzero_ps (),
11710 (__mmask8) __U,
11711 _MM_FROUND_CUR_DIRECTION);
11714 extern __inline __m512d
11715 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11716 _mm512_mul_pd (__m512d __A, __m512d __B)
11718 return (__m512d) ((__v8df)__A * (__v8df)__B);
11721 extern __inline __m512d
11722 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11723 _mm512_mask_mul_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11725 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
11726 (__v8df) __B,
11727 (__v8df) __W,
11728 (__mmask8) __U,
11729 _MM_FROUND_CUR_DIRECTION);
11732 extern __inline __m512d
11733 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11734 _mm512_maskz_mul_pd (__mmask8 __U, __m512d __A, __m512d __B)
11736 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
11737 (__v8df) __B,
11738 (__v8df)
11739 _mm512_setzero_pd (),
11740 (__mmask8) __U,
11741 _MM_FROUND_CUR_DIRECTION);
11744 extern __inline __m512
11745 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11746 _mm512_mul_ps (__m512 __A, __m512 __B)
11748 return (__m512) ((__v16sf)__A * (__v16sf)__B);
11751 extern __inline __m512
11752 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11753 _mm512_mask_mul_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11755 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
11756 (__v16sf) __B,
11757 (__v16sf) __W,
11758 (__mmask16) __U,
11759 _MM_FROUND_CUR_DIRECTION);
11762 extern __inline __m512
11763 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11764 _mm512_maskz_mul_ps (__mmask16 __U, __m512 __A, __m512 __B)
11766 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
11767 (__v16sf) __B,
11768 (__v16sf)
11769 _mm512_setzero_ps (),
11770 (__mmask16) __U,
11771 _MM_FROUND_CUR_DIRECTION);
11774 extern __inline __m128d
11775 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11776 _mm_mask_mul_sd (__m128d __W, __mmask8 __U, __m128d __A,
11777 __m128d __B)
11779 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
11780 (__v2df) __B,
11781 (__v2df) __W,
11782 (__mmask8) __U,
11783 _MM_FROUND_CUR_DIRECTION);
11786 extern __inline __m128d
11787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11788 _mm_maskz_mul_sd (__mmask8 __U, __m128d __A, __m128d __B)
11790 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
11791 (__v2df) __B,
11792 (__v2df)
11793 _mm_setzero_pd (),
11794 (__mmask8) __U,
11795 _MM_FROUND_CUR_DIRECTION);
11798 extern __inline __m128
11799 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11800 _mm_mask_mul_ss (__m128 __W, __mmask8 __U, __m128 __A,
11801 __m128 __B)
11803 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
11804 (__v4sf) __B,
11805 (__v4sf) __W,
11806 (__mmask8) __U,
11807 _MM_FROUND_CUR_DIRECTION);
11810 extern __inline __m128
11811 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11812 _mm_maskz_mul_ss (__mmask8 __U, __m128 __A, __m128 __B)
11814 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
11815 (__v4sf) __B,
11816 (__v4sf)
11817 _mm_setzero_ps (),
11818 (__mmask8) __U,
11819 _MM_FROUND_CUR_DIRECTION);
11822 extern __inline __m512d
11823 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11824 _mm512_div_pd (__m512d __M, __m512d __V)
11826 return (__m512d) ((__v8df)__M / (__v8df)__V);
11829 extern __inline __m512d
11830 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11831 _mm512_mask_div_pd (__m512d __W, __mmask8 __U, __m512d __M, __m512d __V)
11833 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
11834 (__v8df) __V,
11835 (__v8df) __W,
11836 (__mmask8) __U,
11837 _MM_FROUND_CUR_DIRECTION);
11840 extern __inline __m512d
11841 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11842 _mm512_maskz_div_pd (__mmask8 __U, __m512d __M, __m512d __V)
11844 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
11845 (__v8df) __V,
11846 (__v8df)
11847 _mm512_setzero_pd (),
11848 (__mmask8) __U,
11849 _MM_FROUND_CUR_DIRECTION);
11852 extern __inline __m512
11853 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11854 _mm512_div_ps (__m512 __A, __m512 __B)
11856 return (__m512) ((__v16sf)__A / (__v16sf)__B);
11859 extern __inline __m512
11860 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11861 _mm512_mask_div_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11863 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
11864 (__v16sf) __B,
11865 (__v16sf) __W,
11866 (__mmask16) __U,
11867 _MM_FROUND_CUR_DIRECTION);
11870 extern __inline __m512
11871 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11872 _mm512_maskz_div_ps (__mmask16 __U, __m512 __A, __m512 __B)
11874 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
11875 (__v16sf) __B,
11876 (__v16sf)
11877 _mm512_setzero_ps (),
11878 (__mmask16) __U,
11879 _MM_FROUND_CUR_DIRECTION);
11882 extern __inline __m128d
11883 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11884 _mm_mask_div_sd (__m128d __W, __mmask8 __U, __m128d __A,
11885 __m128d __B)
11887 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
11888 (__v2df) __B,
11889 (__v2df) __W,
11890 (__mmask8) __U,
11891 _MM_FROUND_CUR_DIRECTION);
11894 extern __inline __m128d
11895 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11896 _mm_maskz_div_sd (__mmask8 __U, __m128d __A, __m128d __B)
11898 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
11899 (__v2df) __B,
11900 (__v2df)
11901 _mm_setzero_pd (),
11902 (__mmask8) __U,
11903 _MM_FROUND_CUR_DIRECTION);
11906 extern __inline __m128
11907 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11908 _mm_mask_div_ss (__m128 __W, __mmask8 __U, __m128 __A,
11909 __m128 __B)
11911 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
11912 (__v4sf) __B,
11913 (__v4sf) __W,
11914 (__mmask8) __U,
11915 _MM_FROUND_CUR_DIRECTION);
11918 extern __inline __m128
11919 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11920 _mm_maskz_div_ss (__mmask8 __U, __m128 __A, __m128 __B)
11922 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
11923 (__v4sf) __B,
11924 (__v4sf)
11925 _mm_setzero_ps (),
11926 (__mmask8) __U,
11927 _MM_FROUND_CUR_DIRECTION);
11930 extern __inline __m512d
11931 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11932 _mm512_max_pd (__m512d __A, __m512d __B)
11934 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
11935 (__v8df) __B,
11936 (__v8df)
11937 _mm512_undefined_pd (),
11938 (__mmask8) -1,
11939 _MM_FROUND_CUR_DIRECTION);
11942 extern __inline __m512d
11943 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11944 _mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11946 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
11947 (__v8df) __B,
11948 (__v8df) __W,
11949 (__mmask8) __U,
11950 _MM_FROUND_CUR_DIRECTION);
11953 extern __inline __m512d
11954 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11955 _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
11957 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
11958 (__v8df) __B,
11959 (__v8df)
11960 _mm512_setzero_pd (),
11961 (__mmask8) __U,
11962 _MM_FROUND_CUR_DIRECTION);
11965 extern __inline __m512
11966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11967 _mm512_max_ps (__m512 __A, __m512 __B)
11969 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
11970 (__v16sf) __B,
11971 (__v16sf)
11972 _mm512_undefined_ps (),
11973 (__mmask16) -1,
11974 _MM_FROUND_CUR_DIRECTION);
11977 extern __inline __m512
11978 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11979 _mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11981 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
11982 (__v16sf) __B,
11983 (__v16sf) __W,
11984 (__mmask16) __U,
11985 _MM_FROUND_CUR_DIRECTION);
11988 extern __inline __m512
11989 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11990 _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
11992 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
11993 (__v16sf) __B,
11994 (__v16sf)
11995 _mm512_setzero_ps (),
11996 (__mmask16) __U,
11997 _MM_FROUND_CUR_DIRECTION);
12000 extern __inline __m128d
12001 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12002 _mm_mask_max_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
12004 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
12005 (__v2df) __B,
12006 (__v2df) __W,
12007 (__mmask8) __U,
12008 _MM_FROUND_CUR_DIRECTION);
12011 extern __inline __m128d
12012 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12013 _mm_maskz_max_sd (__mmask8 __U, __m128d __A, __m128d __B)
12015 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
12016 (__v2df) __B,
12017 (__v2df)
12018 _mm_setzero_pd (),
12019 (__mmask8) __U,
12020 _MM_FROUND_CUR_DIRECTION);
12023 extern __inline __m128
12024 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12025 _mm_mask_max_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
12027 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
12028 (__v4sf) __B,
12029 (__v4sf) __W,
12030 (__mmask8) __U,
12031 _MM_FROUND_CUR_DIRECTION);
12034 extern __inline __m128
12035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12036 _mm_maskz_max_ss (__mmask8 __U, __m128 __A, __m128 __B)
12038 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
12039 (__v4sf) __B,
12040 (__v4sf)
12041 _mm_setzero_ps (),
12042 (__mmask8) __U,
12043 _MM_FROUND_CUR_DIRECTION);
12046 extern __inline __m512d
12047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12048 _mm512_min_pd (__m512d __A, __m512d __B)
12050 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
12051 (__v8df) __B,
12052 (__v8df)
12053 _mm512_undefined_pd (),
12054 (__mmask8) -1,
12055 _MM_FROUND_CUR_DIRECTION);
12058 extern __inline __m512d
12059 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12060 _mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
12062 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
12063 (__v8df) __B,
12064 (__v8df) __W,
12065 (__mmask8) __U,
12066 _MM_FROUND_CUR_DIRECTION);
12069 extern __inline __m512d
12070 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12071 _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
12073 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
12074 (__v8df) __B,
12075 (__v8df)
12076 _mm512_setzero_pd (),
12077 (__mmask8) __U,
12078 _MM_FROUND_CUR_DIRECTION);
12081 extern __inline __m512
12082 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12083 _mm512_min_ps (__m512 __A, __m512 __B)
12085 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
12086 (__v16sf) __B,
12087 (__v16sf)
12088 _mm512_undefined_ps (),
12089 (__mmask16) -1,
12090 _MM_FROUND_CUR_DIRECTION);
12093 extern __inline __m512
12094 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12095 _mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
12097 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
12098 (__v16sf) __B,
12099 (__v16sf) __W,
12100 (__mmask16) __U,
12101 _MM_FROUND_CUR_DIRECTION);
12104 extern __inline __m512
12105 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12106 _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
12108 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
12109 (__v16sf) __B,
12110 (__v16sf)
12111 _mm512_setzero_ps (),
12112 (__mmask16) __U,
12113 _MM_FROUND_CUR_DIRECTION);
12116 extern __inline __m128d
12117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12118 _mm_mask_min_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
12120 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
12121 (__v2df) __B,
12122 (__v2df) __W,
12123 (__mmask8) __U,
12124 _MM_FROUND_CUR_DIRECTION);
12127 extern __inline __m128d
12128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12129 _mm_maskz_min_sd (__mmask8 __U, __m128d __A, __m128d __B)
12131 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
12132 (__v2df) __B,
12133 (__v2df)
12134 _mm_setzero_pd (),
12135 (__mmask8) __U,
12136 _MM_FROUND_CUR_DIRECTION);
12139 extern __inline __m128
12140 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12141 _mm_mask_min_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
12143 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
12144 (__v4sf) __B,
12145 (__v4sf) __W,
12146 (__mmask8) __U,
12147 _MM_FROUND_CUR_DIRECTION);
12150 extern __inline __m128
12151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12152 _mm_maskz_min_ss (__mmask8 __U, __m128 __A, __m128 __B)
12154 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
12155 (__v4sf) __B,
12156 (__v4sf)
12157 _mm_setzero_ps (),
12158 (__mmask8) __U,
12159 _MM_FROUND_CUR_DIRECTION);
12162 extern __inline __m512d
12163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12164 _mm512_scalef_pd (__m512d __A, __m512d __B)
12166 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
12167 (__v8df) __B,
12168 (__v8df)
12169 _mm512_undefined_pd (),
12170 (__mmask8) -1,
12171 _MM_FROUND_CUR_DIRECTION);
12174 extern __inline __m512d
12175 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12176 _mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
12178 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
12179 (__v8df) __B,
12180 (__v8df) __W,
12181 (__mmask8) __U,
12182 _MM_FROUND_CUR_DIRECTION);
12185 extern __inline __m512d
12186 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12187 _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
12189 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
12190 (__v8df) __B,
12191 (__v8df)
12192 _mm512_setzero_pd (),
12193 (__mmask8) __U,
12194 _MM_FROUND_CUR_DIRECTION);
12197 extern __inline __m512
12198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12199 _mm512_scalef_ps (__m512 __A, __m512 __B)
12201 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
12202 (__v16sf) __B,
12203 (__v16sf)
12204 _mm512_undefined_ps (),
12205 (__mmask16) -1,
12206 _MM_FROUND_CUR_DIRECTION);
12209 extern __inline __m512
12210 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12211 _mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
12213 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
12214 (__v16sf) __B,
12215 (__v16sf) __W,
12216 (__mmask16) __U,
12217 _MM_FROUND_CUR_DIRECTION);
12220 extern __inline __m512
12221 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12222 _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
12224 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
12225 (__v16sf) __B,
12226 (__v16sf)
12227 _mm512_setzero_ps (),
12228 (__mmask16) __U,
12229 _MM_FROUND_CUR_DIRECTION);
12232 extern __inline __m128d
12233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12234 _mm_scalef_sd (__m128d __A, __m128d __B)
12236 return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
12237 (__v2df) __B,
12238 (__v2df)
12239 _mm_setzero_pd (),
12240 (__mmask8) -1,
12241 _MM_FROUND_CUR_DIRECTION);
12244 extern __inline __m128
12245 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12246 _mm_scalef_ss (__m128 __A, __m128 __B)
12248 return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
12249 (__v4sf) __B,
12250 (__v4sf)
12251 _mm_setzero_ps (),
12252 (__mmask8) -1,
12253 _MM_FROUND_CUR_DIRECTION);
12256 extern __inline __m512d
12257 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12258 _mm512_fmadd_pd (__m512d __A, __m512d __B, __m512d __C)
12260 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
12261 (__v8df) __B,
12262 (__v8df) __C,
12263 (__mmask8) -1,
12264 _MM_FROUND_CUR_DIRECTION);
12267 extern __inline __m512d
12268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12269 _mm512_mask_fmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
12271 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
12272 (__v8df) __B,
12273 (__v8df) __C,
12274 (__mmask8) __U,
12275 _MM_FROUND_CUR_DIRECTION);
12278 extern __inline __m512d
12279 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12280 _mm512_mask3_fmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
12282 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
12283 (__v8df) __B,
12284 (__v8df) __C,
12285 (__mmask8) __U,
12286 _MM_FROUND_CUR_DIRECTION);
12289 extern __inline __m512d
12290 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12291 _mm512_maskz_fmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
12293 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
12294 (__v8df) __B,
12295 (__v8df) __C,
12296 (__mmask8) __U,
12297 _MM_FROUND_CUR_DIRECTION);
12300 extern __inline __m512
12301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12302 _mm512_fmadd_ps (__m512 __A, __m512 __B, __m512 __C)
12304 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
12305 (__v16sf) __B,
12306 (__v16sf) __C,
12307 (__mmask16) -1,
12308 _MM_FROUND_CUR_DIRECTION);
12311 extern __inline __m512
12312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12313 _mm512_mask_fmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
12315 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
12316 (__v16sf) __B,
12317 (__v16sf) __C,
12318 (__mmask16) __U,
12319 _MM_FROUND_CUR_DIRECTION);
12322 extern __inline __m512
12323 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12324 _mm512_mask3_fmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
12326 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
12327 (__v16sf) __B,
12328 (__v16sf) __C,
12329 (__mmask16) __U,
12330 _MM_FROUND_CUR_DIRECTION);
12333 extern __inline __m512
12334 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12335 _mm512_maskz_fmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
12337 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
12338 (__v16sf) __B,
12339 (__v16sf) __C,
12340 (__mmask16) __U,
12341 _MM_FROUND_CUR_DIRECTION);
12344 extern __inline __m512d
12345 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12346 _mm512_fmsub_pd (__m512d __A, __m512d __B, __m512d __C)
12348 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
12349 (__v8df) __B,
12350 -(__v8df) __C,
12351 (__mmask8) -1,
12352 _MM_FROUND_CUR_DIRECTION);
12355 extern __inline __m512d
12356 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12357 _mm512_mask_fmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
12359 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
12360 (__v8df) __B,
12361 -(__v8df) __C,
12362 (__mmask8) __U,
12363 _MM_FROUND_CUR_DIRECTION);
12366 extern __inline __m512d
12367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12368 _mm512_mask3_fmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
12370 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
12371 (__v8df) __B,
12372 (__v8df) __C,
12373 (__mmask8) __U,
12374 _MM_FROUND_CUR_DIRECTION);
12377 extern __inline __m512d
12378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12379 _mm512_maskz_fmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
12381 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
12382 (__v8df) __B,
12383 -(__v8df) __C,
12384 (__mmask8) __U,
12385 _MM_FROUND_CUR_DIRECTION);
12388 extern __inline __m512
12389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12390 _mm512_fmsub_ps (__m512 __A, __m512 __B, __m512 __C)
12392 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
12393 (__v16sf) __B,
12394 -(__v16sf) __C,
12395 (__mmask16) -1,
12396 _MM_FROUND_CUR_DIRECTION);
12399 extern __inline __m512
12400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12401 _mm512_mask_fmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
12403 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
12404 (__v16sf) __B,
12405 -(__v16sf) __C,
12406 (__mmask16) __U,
12407 _MM_FROUND_CUR_DIRECTION);
12410 extern __inline __m512
12411 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12412 _mm512_mask3_fmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
12414 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
12415 (__v16sf) __B,
12416 (__v16sf) __C,
12417 (__mmask16) __U,
12418 _MM_FROUND_CUR_DIRECTION);
12421 extern __inline __m512
12422 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12423 _mm512_maskz_fmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
12425 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
12426 (__v16sf) __B,
12427 -(__v16sf) __C,
12428 (__mmask16) __U,
12429 _MM_FROUND_CUR_DIRECTION);
12432 extern __inline __m512d
12433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12434 _mm512_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C)
12436 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
12437 (__v8df) __B,
12438 (__v8df) __C,
12439 (__mmask8) -1,
12440 _MM_FROUND_CUR_DIRECTION);
12443 extern __inline __m512d
12444 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12445 _mm512_mask_fmaddsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
12447 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
12448 (__v8df) __B,
12449 (__v8df) __C,
12450 (__mmask8) __U,
12451 _MM_FROUND_CUR_DIRECTION);
12454 extern __inline __m512d
12455 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12456 _mm512_mask3_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
12458 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
12459 (__v8df) __B,
12460 (__v8df) __C,
12461 (__mmask8) __U,
12462 _MM_FROUND_CUR_DIRECTION);
12465 extern __inline __m512d
12466 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12467 _mm512_maskz_fmaddsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
12469 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
12470 (__v8df) __B,
12471 (__v8df) __C,
12472 (__mmask8) __U,
12473 _MM_FROUND_CUR_DIRECTION);
12476 extern __inline __m512
12477 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12478 _mm512_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C)
12480 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
12481 (__v16sf) __B,
12482 (__v16sf) __C,
12483 (__mmask16) -1,
12484 _MM_FROUND_CUR_DIRECTION);
12487 extern __inline __m512
12488 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12489 _mm512_mask_fmaddsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
12491 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
12492 (__v16sf) __B,
12493 (__v16sf) __C,
12494 (__mmask16) __U,
12495 _MM_FROUND_CUR_DIRECTION);
12498 extern __inline __m512
12499 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12500 _mm512_mask3_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
12502 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
12503 (__v16sf) __B,
12504 (__v16sf) __C,
12505 (__mmask16) __U,
12506 _MM_FROUND_CUR_DIRECTION);
12509 extern __inline __m512
12510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12511 _mm512_maskz_fmaddsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
12513 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
12514 (__v16sf) __B,
12515 (__v16sf) __C,
12516 (__mmask16) __U,
12517 _MM_FROUND_CUR_DIRECTION);
12520 extern __inline __m512d
12521 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12522 _mm512_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C)
12524 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
12525 (__v8df) __B,
12526 -(__v8df) __C,
12527 (__mmask8) -1,
12528 _MM_FROUND_CUR_DIRECTION);
12531 extern __inline __m512d
12532 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12533 _mm512_mask_fmsubadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
12535 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
12536 (__v8df) __B,
12537 -(__v8df) __C,
12538 (__mmask8) __U,
12539 _MM_FROUND_CUR_DIRECTION);
12542 extern __inline __m512d
12543 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12544 _mm512_mask3_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
12546 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
12547 (__v8df) __B,
12548 (__v8df) __C,
12549 (__mmask8) __U,
12550 _MM_FROUND_CUR_DIRECTION);
12553 extern __inline __m512d
12554 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12555 _mm512_maskz_fmsubadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
12557 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
12558 (__v8df) __B,
12559 -(__v8df) __C,
12560 (__mmask8) __U,
12561 _MM_FROUND_CUR_DIRECTION);
12564 extern __inline __m512
12565 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12566 _mm512_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C)
12568 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
12569 (__v16sf) __B,
12570 -(__v16sf) __C,
12571 (__mmask16) -1,
12572 _MM_FROUND_CUR_DIRECTION);
12575 extern __inline __m512
12576 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12577 _mm512_mask_fmsubadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
12579 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
12580 (__v16sf) __B,
12581 -(__v16sf) __C,
12582 (__mmask16) __U,
12583 _MM_FROUND_CUR_DIRECTION);
12586 extern __inline __m512
12587 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12588 _mm512_mask3_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
12590 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
12591 (__v16sf) __B,
12592 (__v16sf) __C,
12593 (__mmask16) __U,
12594 _MM_FROUND_CUR_DIRECTION);
12597 extern __inline __m512
12598 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12599 _mm512_maskz_fmsubadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
12601 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
12602 (__v16sf) __B,
12603 -(__v16sf) __C,
12604 (__mmask16) __U,
12605 _MM_FROUND_CUR_DIRECTION);
12608 extern __inline __m512d
12609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12610 _mm512_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C)
12612 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
12613 (__v8df) __B,
12614 (__v8df) __C,
12615 (__mmask8) -1,
12616 _MM_FROUND_CUR_DIRECTION);
12619 extern __inline __m512d
12620 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12621 _mm512_mask_fnmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
12623 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
12624 (__v8df) __B,
12625 (__v8df) __C,
12626 (__mmask8) __U,
12627 _MM_FROUND_CUR_DIRECTION);
12630 extern __inline __m512d
12631 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12632 _mm512_mask3_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
12634 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
12635 (__v8df) __B,
12636 (__v8df) __C,
12637 (__mmask8) __U,
12638 _MM_FROUND_CUR_DIRECTION);
12641 extern __inline __m512d
12642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12643 _mm512_maskz_fnmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
12645 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
12646 (__v8df) __B,
12647 (__v8df) __C,
12648 (__mmask8) __U,
12649 _MM_FROUND_CUR_DIRECTION);
12652 extern __inline __m512
12653 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12654 _mm512_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C)
12656 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
12657 (__v16sf) __B,
12658 (__v16sf) __C,
12659 (__mmask16) -1,
12660 _MM_FROUND_CUR_DIRECTION);
12663 extern __inline __m512
12664 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12665 _mm512_mask_fnmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
12667 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
12668 (__v16sf) __B,
12669 (__v16sf) __C,
12670 (__mmask16) __U,
12671 _MM_FROUND_CUR_DIRECTION);
12674 extern __inline __m512
12675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12676 _mm512_mask3_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
12678 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
12679 (__v16sf) __B,
12680 (__v16sf) __C,
12681 (__mmask16) __U,
12682 _MM_FROUND_CUR_DIRECTION);
12685 extern __inline __m512
12686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12687 _mm512_maskz_fnmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
12689 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
12690 (__v16sf) __B,
12691 (__v16sf) __C,
12692 (__mmask16) __U,
12693 _MM_FROUND_CUR_DIRECTION);
12696 extern __inline __m512d
12697 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12698 _mm512_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C)
12700 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
12701 (__v8df) __B,
12702 -(__v8df) __C,
12703 (__mmask8) -1,
12704 _MM_FROUND_CUR_DIRECTION);
12707 extern __inline __m512d
12708 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12709 _mm512_mask_fnmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
12711 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
12712 (__v8df) __B,
12713 (__v8df) __C,
12714 (__mmask8) __U,
12715 _MM_FROUND_CUR_DIRECTION);
12718 extern __inline __m512d
12719 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12720 _mm512_mask3_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
12722 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
12723 (__v8df) __B,
12724 (__v8df) __C,
12725 (__mmask8) __U,
12726 _MM_FROUND_CUR_DIRECTION);
12729 extern __inline __m512d
12730 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12731 _mm512_maskz_fnmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
12733 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
12734 (__v8df) __B,
12735 -(__v8df) __C,
12736 (__mmask8) __U,
12737 _MM_FROUND_CUR_DIRECTION);
12740 extern __inline __m512
12741 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12742 _mm512_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C)
12744 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
12745 (__v16sf) __B,
12746 -(__v16sf) __C,
12747 (__mmask16) -1,
12748 _MM_FROUND_CUR_DIRECTION);
12751 extern __inline __m512
12752 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12753 _mm512_mask_fnmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
12755 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
12756 (__v16sf) __B,
12757 (__v16sf) __C,
12758 (__mmask16) __U,
12759 _MM_FROUND_CUR_DIRECTION);
12762 extern __inline __m512
12763 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12764 _mm512_mask3_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
12766 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
12767 (__v16sf) __B,
12768 (__v16sf) __C,
12769 (__mmask16) __U,
12770 _MM_FROUND_CUR_DIRECTION);
12773 extern __inline __m512
12774 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12775 _mm512_maskz_fnmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
12777 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
12778 (__v16sf) __B,
12779 -(__v16sf) __C,
12780 (__mmask16) __U,
12781 _MM_FROUND_CUR_DIRECTION);
12784 extern __inline __m256i
12785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12786 _mm512_cvttpd_epi32 (__m512d __A)
12788 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
12789 (__v8si)
12790 _mm256_undefined_si256 (),
12791 (__mmask8) -1,
12792 _MM_FROUND_CUR_DIRECTION);
12795 extern __inline __m256i
12796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12797 _mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
12799 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
12800 (__v8si) __W,
12801 (__mmask8) __U,
12802 _MM_FROUND_CUR_DIRECTION);
12805 extern __inline __m256i
12806 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12807 _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
12809 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
12810 (__v8si)
12811 _mm256_setzero_si256 (),
12812 (__mmask8) __U,
12813 _MM_FROUND_CUR_DIRECTION);
12816 extern __inline __m256i
12817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12818 _mm512_cvttpd_epu32 (__m512d __A)
12820 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
12821 (__v8si)
12822 _mm256_undefined_si256 (),
12823 (__mmask8) -1,
12824 _MM_FROUND_CUR_DIRECTION);
12827 extern __inline __m256i
12828 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12829 _mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
12831 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
12832 (__v8si) __W,
12833 (__mmask8) __U,
12834 _MM_FROUND_CUR_DIRECTION);
12837 extern __inline __m256i
12838 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12839 _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
12841 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
12842 (__v8si)
12843 _mm256_setzero_si256 (),
12844 (__mmask8) __U,
12845 _MM_FROUND_CUR_DIRECTION);
12848 extern __inline __m256i
12849 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12850 _mm512_cvtpd_epi32 (__m512d __A)
12852 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
12853 (__v8si)
12854 _mm256_undefined_si256 (),
12855 (__mmask8) -1,
12856 _MM_FROUND_CUR_DIRECTION);
12859 extern __inline __m256i
12860 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12861 _mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
12863 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
12864 (__v8si) __W,
12865 (__mmask8) __U,
12866 _MM_FROUND_CUR_DIRECTION);
12869 extern __inline __m256i
12870 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12871 _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
12873 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
12874 (__v8si)
12875 _mm256_setzero_si256 (),
12876 (__mmask8) __U,
12877 _MM_FROUND_CUR_DIRECTION);
12880 extern __inline __m256i
12881 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12882 _mm512_cvtpd_epu32 (__m512d __A)
12884 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
12885 (__v8si)
12886 _mm256_undefined_si256 (),
12887 (__mmask8) -1,
12888 _MM_FROUND_CUR_DIRECTION);
12891 extern __inline __m256i
12892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12893 _mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
12895 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
12896 (__v8si) __W,
12897 (__mmask8) __U,
12898 _MM_FROUND_CUR_DIRECTION);
12901 extern __inline __m256i
12902 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12903 _mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
12905 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
12906 (__v8si)
12907 _mm256_setzero_si256 (),
12908 (__mmask8) __U,
12909 _MM_FROUND_CUR_DIRECTION);
12912 extern __inline __m512i
12913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12914 _mm512_cvttps_epi32 (__m512 __A)
12916 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
12917 (__v16si)
12918 _mm512_undefined_epi32 (),
12919 (__mmask16) -1,
12920 _MM_FROUND_CUR_DIRECTION);
12923 extern __inline __m512i
12924 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12925 _mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
12927 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
12928 (__v16si) __W,
12929 (__mmask16) __U,
12930 _MM_FROUND_CUR_DIRECTION);
12933 extern __inline __m512i
12934 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12935 _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
12937 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
12938 (__v16si)
12939 _mm512_setzero_si512 (),
12940 (__mmask16) __U,
12941 _MM_FROUND_CUR_DIRECTION);
12944 extern __inline __m512i
12945 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12946 _mm512_cvttps_epu32 (__m512 __A)
12948 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
12949 (__v16si)
12950 _mm512_undefined_epi32 (),
12951 (__mmask16) -1,
12952 _MM_FROUND_CUR_DIRECTION);
12955 extern __inline __m512i
12956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12957 _mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
12959 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
12960 (__v16si) __W,
12961 (__mmask16) __U,
12962 _MM_FROUND_CUR_DIRECTION);
12965 extern __inline __m512i
12966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12967 _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
12969 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
12970 (__v16si)
12971 _mm512_setzero_si512 (),
12972 (__mmask16) __U,
12973 _MM_FROUND_CUR_DIRECTION);
12976 extern __inline __m512i
12977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12978 _mm512_cvtps_epi32 (__m512 __A)
12980 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
12981 (__v16si)
12982 _mm512_undefined_epi32 (),
12983 (__mmask16) -1,
12984 _MM_FROUND_CUR_DIRECTION);
12987 extern __inline __m512i
12988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12989 _mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
12991 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
12992 (__v16si) __W,
12993 (__mmask16) __U,
12994 _MM_FROUND_CUR_DIRECTION);
12997 extern __inline __m512i
12998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12999 _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
13001 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
13002 (__v16si)
13003 _mm512_setzero_si512 (),
13004 (__mmask16) __U,
13005 _MM_FROUND_CUR_DIRECTION);
13008 extern __inline __m512i
13009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13010 _mm512_cvtps_epu32 (__m512 __A)
13012 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
13013 (__v16si)
13014 _mm512_undefined_epi32 (),
13015 (__mmask16) -1,
13016 _MM_FROUND_CUR_DIRECTION);
13019 extern __inline __m512i
13020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13021 _mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
13023 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
13024 (__v16si) __W,
13025 (__mmask16) __U,
13026 _MM_FROUND_CUR_DIRECTION);
13029 extern __inline __m512i
13030 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13031 _mm512_maskz_cvtps_epu32 (__mmask16 __U, __m512 __A)
13033 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
13034 (__v16si)
13035 _mm512_setzero_si512 (),
13036 (__mmask16) __U,
13037 _MM_FROUND_CUR_DIRECTION);
13040 extern __inline double
13041 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13042 _mm512_cvtsd_f64 (__m512d __A)
13044 return __A[0];
13047 extern __inline float
13048 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13049 _mm512_cvtss_f32 (__m512 __A)
13051 return __A[0];
13054 #ifdef __x86_64__
13055 extern __inline __m128
13056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13057 _mm_cvtu64_ss (__m128 __A, unsigned long long __B)
13059 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
13060 _MM_FROUND_CUR_DIRECTION);
13063 extern __inline __m128d
13064 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13065 _mm_cvtu64_sd (__m128d __A, unsigned long long __B)
13067 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
13068 _MM_FROUND_CUR_DIRECTION);
13070 #endif
13072 extern __inline __m128
13073 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13074 _mm_cvtu32_ss (__m128 __A, unsigned __B)
13076 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
13077 _MM_FROUND_CUR_DIRECTION);
13080 extern __inline __m512
13081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13082 _mm512_cvtepi32_ps (__m512i __A)
13084 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
13085 (__v16sf)
13086 _mm512_undefined_ps (),
13087 (__mmask16) -1,
13088 _MM_FROUND_CUR_DIRECTION);
13091 extern __inline __m512
13092 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13093 _mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
13095 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
13096 (__v16sf) __W,
13097 (__mmask16) __U,
13098 _MM_FROUND_CUR_DIRECTION);
13101 extern __inline __m512
13102 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13103 _mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
13105 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
13106 (__v16sf)
13107 _mm512_setzero_ps (),
13108 (__mmask16) __U,
13109 _MM_FROUND_CUR_DIRECTION);
13112 extern __inline __m512
13113 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13114 _mm512_cvtepu32_ps (__m512i __A)
13116 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
13117 (__v16sf)
13118 _mm512_undefined_ps (),
13119 (__mmask16) -1,
13120 _MM_FROUND_CUR_DIRECTION);
13123 extern __inline __m512
13124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13125 _mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
13127 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
13128 (__v16sf) __W,
13129 (__mmask16) __U,
13130 _MM_FROUND_CUR_DIRECTION);
13133 extern __inline __m512
13134 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13135 _mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
13137 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
13138 (__v16sf)
13139 _mm512_setzero_ps (),
13140 (__mmask16) __U,
13141 _MM_FROUND_CUR_DIRECTION);
13144 #ifdef __OPTIMIZE__
13145 extern __inline __m512d
13146 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13147 _mm512_fixupimm_pd (__m512d __A, __m512d __B, __m512i __C, const int __imm)
13149 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
13150 (__v8df) __B,
13151 (__v8di) __C,
13152 __imm,
13153 (__mmask8) -1,
13154 _MM_FROUND_CUR_DIRECTION);
13157 extern __inline __m512d
13158 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13159 _mm512_mask_fixupimm_pd (__m512d __A, __mmask8 __U, __m512d __B,
13160 __m512i __C, const int __imm)
13162 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
13163 (__v8df) __B,
13164 (__v8di) __C,
13165 __imm,
13166 (__mmask8) __U,
13167 _MM_FROUND_CUR_DIRECTION);
13170 extern __inline __m512d
13171 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13172 _mm512_maskz_fixupimm_pd (__mmask8 __U, __m512d __A, __m512d __B,
13173 __m512i __C, const int __imm)
13175 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
13176 (__v8df) __B,
13177 (__v8di) __C,
13178 __imm,
13179 (__mmask8) __U,
13180 _MM_FROUND_CUR_DIRECTION);
13183 extern __inline __m512
13184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13185 _mm512_fixupimm_ps (__m512 __A, __m512 __B, __m512i __C, const int __imm)
13187 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
13188 (__v16sf) __B,
13189 (__v16si) __C,
13190 __imm,
13191 (__mmask16) -1,
13192 _MM_FROUND_CUR_DIRECTION);
13195 extern __inline __m512
13196 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13197 _mm512_mask_fixupimm_ps (__m512 __A, __mmask16 __U, __m512 __B,
13198 __m512i __C, const int __imm)
13200 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
13201 (__v16sf) __B,
13202 (__v16si) __C,
13203 __imm,
13204 (__mmask16) __U,
13205 _MM_FROUND_CUR_DIRECTION);
13208 extern __inline __m512
13209 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13210 _mm512_maskz_fixupimm_ps (__mmask16 __U, __m512 __A, __m512 __B,
13211 __m512i __C, const int __imm)
13213 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
13214 (__v16sf) __B,
13215 (__v16si) __C,
13216 __imm,
13217 (__mmask16) __U,
13218 _MM_FROUND_CUR_DIRECTION);
13221 extern __inline __m128d
13222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13223 _mm_fixupimm_sd (__m128d __A, __m128d __B, __m128i __C, const int __imm)
13225 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
13226 (__v2df) __B,
13227 (__v2di) __C, __imm,
13228 (__mmask8) -1,
13229 _MM_FROUND_CUR_DIRECTION);
13232 extern __inline __m128d
13233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13234 _mm_mask_fixupimm_sd (__m128d __A, __mmask8 __U, __m128d __B,
13235 __m128i __C, const int __imm)
13237 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
13238 (__v2df) __B,
13239 (__v2di) __C, __imm,
13240 (__mmask8) __U,
13241 _MM_FROUND_CUR_DIRECTION);
13244 extern __inline __m128d
13245 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13246 _mm_maskz_fixupimm_sd (__mmask8 __U, __m128d __A, __m128d __B,
13247 __m128i __C, const int __imm)
13249 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
13250 (__v2df) __B,
13251 (__v2di) __C,
13252 __imm,
13253 (__mmask8) __U,
13254 _MM_FROUND_CUR_DIRECTION);
13257 extern __inline __m128
13258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13259 _mm_fixupimm_ss (__m128 __A, __m128 __B, __m128i __C, const int __imm)
13261 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
13262 (__v4sf) __B,
13263 (__v4si) __C, __imm,
13264 (__mmask8) -1,
13265 _MM_FROUND_CUR_DIRECTION);
13268 extern __inline __m128
13269 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13270 _mm_mask_fixupimm_ss (__m128 __A, __mmask8 __U, __m128 __B,
13271 __m128i __C, const int __imm)
13273 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
13274 (__v4sf) __B,
13275 (__v4si) __C, __imm,
13276 (__mmask8) __U,
13277 _MM_FROUND_CUR_DIRECTION);
13280 extern __inline __m128
13281 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13282 _mm_maskz_fixupimm_ss (__mmask8 __U, __m128 __A, __m128 __B,
13283 __m128i __C, const int __imm)
13285 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
13286 (__v4sf) __B,
13287 (__v4si) __C, __imm,
13288 (__mmask8) __U,
13289 _MM_FROUND_CUR_DIRECTION);
13291 #else
13292 #define _mm512_fixupimm_pd(X, Y, Z, C) \
13293 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
13294 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
13295 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
13297 #define _mm512_mask_fixupimm_pd(X, U, Y, Z, C) \
13298 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
13299 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
13300 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13302 #define _mm512_maskz_fixupimm_pd(U, X, Y, Z, C) \
13303 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
13304 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
13305 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13307 #define _mm512_fixupimm_ps(X, Y, Z, C) \
13308 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
13309 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
13310 (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
13312 #define _mm512_mask_fixupimm_ps(X, U, Y, Z, C) \
13313 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
13314 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
13315 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
13317 #define _mm512_maskz_fixupimm_ps(U, X, Y, Z, C) \
13318 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
13319 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
13320 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
13322 #define _mm_fixupimm_sd(X, Y, Z, C) \
13323 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
13324 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
13325 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
13327 #define _mm_mask_fixupimm_sd(X, U, Y, Z, C) \
13328 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
13329 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
13330 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13332 #define _mm_maskz_fixupimm_sd(U, X, Y, Z, C) \
13333 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
13334 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
13335 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13337 #define _mm_fixupimm_ss(X, Y, Z, C) \
13338 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
13339 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
13340 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
13342 #define _mm_mask_fixupimm_ss(X, U, Y, Z, C) \
13343 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
13344 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
13345 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13347 #define _mm_maskz_fixupimm_ss(U, X, Y, Z, C) \
13348 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
13349 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
13350 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13351 #endif
13353 #ifdef __x86_64__
13354 extern __inline unsigned long long
13355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13356 _mm_cvtss_u64 (__m128 __A)
13358 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
13359 __A,
13360 _MM_FROUND_CUR_DIRECTION);
13363 extern __inline unsigned long long
13364 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13365 _mm_cvttss_u64 (__m128 __A)
13367 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
13368 __A,
13369 _MM_FROUND_CUR_DIRECTION);
13372 extern __inline long long
13373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13374 _mm_cvttss_i64 (__m128 __A)
13376 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
13377 _MM_FROUND_CUR_DIRECTION);
13379 #endif /* __x86_64__ */
13381 extern __inline unsigned
13382 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13383 _mm_cvtss_u32 (__m128 __A)
13385 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
13386 _MM_FROUND_CUR_DIRECTION);
13389 extern __inline unsigned
13390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13391 _mm_cvttss_u32 (__m128 __A)
13393 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
13394 _MM_FROUND_CUR_DIRECTION);
13397 extern __inline int
13398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13399 _mm_cvttss_i32 (__m128 __A)
13401 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
13402 _MM_FROUND_CUR_DIRECTION);
13405 #ifdef __x86_64__
13406 extern __inline unsigned long long
13407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13408 _mm_cvtsd_u64 (__m128d __A)
13410 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
13411 __A,
13412 _MM_FROUND_CUR_DIRECTION);
13415 extern __inline unsigned long long
13416 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13417 _mm_cvttsd_u64 (__m128d __A)
13419 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
13420 __A,
13421 _MM_FROUND_CUR_DIRECTION);
13424 extern __inline long long
13425 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13426 _mm_cvttsd_i64 (__m128d __A)
13428 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
13429 _MM_FROUND_CUR_DIRECTION);
13431 #endif /* __x86_64__ */
13433 extern __inline unsigned
13434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13435 _mm_cvtsd_u32 (__m128d __A)
13437 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
13438 _MM_FROUND_CUR_DIRECTION);
13441 extern __inline unsigned
13442 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13443 _mm_cvttsd_u32 (__m128d __A)
13445 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
13446 _MM_FROUND_CUR_DIRECTION);
13449 extern __inline int
13450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13451 _mm_cvttsd_i32 (__m128d __A)
13453 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
13454 _MM_FROUND_CUR_DIRECTION);
13457 extern __inline __m512d
13458 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13459 _mm512_cvtps_pd (__m256 __A)
13461 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
13462 (__v8df)
13463 _mm512_undefined_pd (),
13464 (__mmask8) -1,
13465 _MM_FROUND_CUR_DIRECTION);
13468 extern __inline __m512d
13469 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13470 _mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
13472 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
13473 (__v8df) __W,
13474 (__mmask8) __U,
13475 _MM_FROUND_CUR_DIRECTION);
13478 extern __inline __m512d
13479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13480 _mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
13482 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
13483 (__v8df)
13484 _mm512_setzero_pd (),
13485 (__mmask8) __U,
13486 _MM_FROUND_CUR_DIRECTION);
13489 extern __inline __m512
13490 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13491 _mm512_cvtph_ps (__m256i __A)
13493 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
13494 (__v16sf)
13495 _mm512_undefined_ps (),
13496 (__mmask16) -1,
13497 _MM_FROUND_CUR_DIRECTION);
13500 extern __inline __m512
13501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13502 _mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
13504 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
13505 (__v16sf) __W,
13506 (__mmask16) __U,
13507 _MM_FROUND_CUR_DIRECTION);
13510 extern __inline __m512
13511 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13512 _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
13514 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
13515 (__v16sf)
13516 _mm512_setzero_ps (),
13517 (__mmask16) __U,
13518 _MM_FROUND_CUR_DIRECTION);
13521 extern __inline __m256
13522 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13523 _mm512_cvtpd_ps (__m512d __A)
13525 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
13526 (__v8sf)
13527 _mm256_undefined_ps (),
13528 (__mmask8) -1,
13529 _MM_FROUND_CUR_DIRECTION);
13532 extern __inline __m256
13533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13534 _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
13536 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
13537 (__v8sf) __W,
13538 (__mmask8) __U,
13539 _MM_FROUND_CUR_DIRECTION);
13542 extern __inline __m256
13543 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13544 _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
13546 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
13547 (__v8sf)
13548 _mm256_setzero_ps (),
13549 (__mmask8) __U,
13550 _MM_FROUND_CUR_DIRECTION);
13553 #ifdef __OPTIMIZE__
13554 extern __inline __m512
13555 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13556 _mm512_getexp_ps (__m512 __A)
13558 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
13559 (__v16sf)
13560 _mm512_undefined_ps (),
13561 (__mmask16) -1,
13562 _MM_FROUND_CUR_DIRECTION);
13565 extern __inline __m512
13566 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13567 _mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
13569 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
13570 (__v16sf) __W,
13571 (__mmask16) __U,
13572 _MM_FROUND_CUR_DIRECTION);
13575 extern __inline __m512
13576 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13577 _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
13579 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
13580 (__v16sf)
13581 _mm512_setzero_ps (),
13582 (__mmask16) __U,
13583 _MM_FROUND_CUR_DIRECTION);
13586 extern __inline __m512d
13587 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13588 _mm512_getexp_pd (__m512d __A)
13590 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
13591 (__v8df)
13592 _mm512_undefined_pd (),
13593 (__mmask8) -1,
13594 _MM_FROUND_CUR_DIRECTION);
13597 extern __inline __m512d
13598 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13599 _mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
13601 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
13602 (__v8df) __W,
13603 (__mmask8) __U,
13604 _MM_FROUND_CUR_DIRECTION);
13607 extern __inline __m512d
13608 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13609 _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
13611 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
13612 (__v8df)
13613 _mm512_setzero_pd (),
13614 (__mmask8) __U,
13615 _MM_FROUND_CUR_DIRECTION);
13618 extern __inline __m128
13619 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13620 _mm_getexp_ss (__m128 __A, __m128 __B)
13622 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
13623 (__v4sf) __B,
13624 _MM_FROUND_CUR_DIRECTION);
13627 extern __inline __m128
13628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13629 _mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
13631 return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
13632 (__v4sf) __B,
13633 (__v4sf) __W,
13634 (__mmask8) __U,
13635 _MM_FROUND_CUR_DIRECTION);
13638 extern __inline __m128
13639 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13640 _mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
13642 return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
13643 (__v4sf) __B,
13644 (__v4sf)
13645 _mm_setzero_ps (),
13646 (__mmask8) __U,
13647 _MM_FROUND_CUR_DIRECTION);
13650 extern __inline __m128d
13651 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13652 _mm_getexp_sd (__m128d __A, __m128d __B)
13654 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
13655 (__v2df) __B,
13656 _MM_FROUND_CUR_DIRECTION);
13659 extern __inline __m128d
13660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13661 _mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
13663 return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
13664 (__v2df) __B,
13665 (__v2df) __W,
13666 (__mmask8) __U,
13667 _MM_FROUND_CUR_DIRECTION);
13670 extern __inline __m128d
13671 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13672 _mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
13674 return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
13675 (__v2df) __B,
13676 (__v2df)
13677 _mm_setzero_pd (),
13678 (__mmask8) __U,
13679 _MM_FROUND_CUR_DIRECTION);
13682 extern __inline __m512d
13683 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13684 _mm512_getmant_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
13685 _MM_MANTISSA_SIGN_ENUM __C)
13687 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
13688 (__C << 2) | __B,
13689 _mm512_undefined_pd (),
13690 (__mmask8) -1,
13691 _MM_FROUND_CUR_DIRECTION);
13694 extern __inline __m512d
13695 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13696 _mm512_mask_getmant_pd (__m512d __W, __mmask8 __U, __m512d __A,
13697 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
13699 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
13700 (__C << 2) | __B,
13701 (__v8df) __W, __U,
13702 _MM_FROUND_CUR_DIRECTION);
13705 extern __inline __m512d
13706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13707 _mm512_maskz_getmant_pd (__mmask8 __U, __m512d __A,
13708 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
13710 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
13711 (__C << 2) | __B,
13712 (__v8df)
13713 _mm512_setzero_pd (),
13714 __U,
13715 _MM_FROUND_CUR_DIRECTION);
13718 extern __inline __m512
13719 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13720 _mm512_getmant_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
13721 _MM_MANTISSA_SIGN_ENUM __C)
13723 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
13724 (__C << 2) | __B,
13725 _mm512_undefined_ps (),
13726 (__mmask16) -1,
13727 _MM_FROUND_CUR_DIRECTION);
13730 extern __inline __m512
13731 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13732 _mm512_mask_getmant_ps (__m512 __W, __mmask16 __U, __m512 __A,
13733 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
13735 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
13736 (__C << 2) | __B,
13737 (__v16sf) __W, __U,
13738 _MM_FROUND_CUR_DIRECTION);
13741 extern __inline __m512
13742 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13743 _mm512_maskz_getmant_ps (__mmask16 __U, __m512 __A,
13744 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
13746 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
13747 (__C << 2) | __B,
13748 (__v16sf)
13749 _mm512_setzero_ps (),
13750 __U,
13751 _MM_FROUND_CUR_DIRECTION);
13754 extern __inline __m128d
13755 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13756 _mm_getmant_sd (__m128d __A, __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
13757 _MM_MANTISSA_SIGN_ENUM __D)
13759 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
13760 (__v2df) __B,
13761 (__D << 2) | __C,
13762 _MM_FROUND_CUR_DIRECTION);
13765 extern __inline __m128d
13766 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13767 _mm_mask_getmant_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
13768 _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
13770 return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
13771 (__v2df) __B,
13772 (__D << 2) | __C,
13773 (__v2df) __W,
13774 __U,
13775 _MM_FROUND_CUR_DIRECTION);
13778 extern __inline __m128d
13779 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13780 _mm_maskz_getmant_sd (__mmask8 __U, __m128d __A, __m128d __B,
13781 _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
13783 return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
13784 (__v2df) __B,
13785 (__D << 2) | __C,
13786 (__v2df)
13787 _mm_setzero_pd(),
13788 __U,
13789 _MM_FROUND_CUR_DIRECTION);
13792 extern __inline __m128
13793 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13794 _mm_getmant_ss (__m128 __A, __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
13795 _MM_MANTISSA_SIGN_ENUM __D)
13797 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
13798 (__v4sf) __B,
13799 (__D << 2) | __C,
13800 _MM_FROUND_CUR_DIRECTION);
13803 extern __inline __m128
13804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13805 _mm_mask_getmant_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
13806 _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
13808 return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
13809 (__v4sf) __B,
13810 (__D << 2) | __C,
13811 (__v4sf) __W,
13812 __U,
13813 _MM_FROUND_CUR_DIRECTION);
13816 extern __inline __m128
13817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13818 _mm_maskz_getmant_ss (__mmask8 __U, __m128 __A, __m128 __B,
13819 _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
13821 return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
13822 (__v4sf) __B,
13823 (__D << 2) | __C,
13824 (__v4sf)
13825 _mm_setzero_ps(),
13826 __U,
13827 _MM_FROUND_CUR_DIRECTION);
13830 #else
13831 #define _mm512_getmant_pd(X, B, C) \
13832 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
13833 (int)(((C)<<2) | (B)), \
13834 (__v8df)_mm512_undefined_pd(), \
13835 (__mmask8)-1,\
13836 _MM_FROUND_CUR_DIRECTION))
13838 #define _mm512_mask_getmant_pd(W, U, X, B, C) \
13839 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
13840 (int)(((C)<<2) | (B)), \
13841 (__v8df)(__m512d)(W), \
13842 (__mmask8)(U),\
13843 _MM_FROUND_CUR_DIRECTION))
13845 #define _mm512_maskz_getmant_pd(U, X, B, C) \
13846 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
13847 (int)(((C)<<2) | (B)), \
13848 (__v8df)_mm512_setzero_pd(), \
13849 (__mmask8)(U),\
13850 _MM_FROUND_CUR_DIRECTION))
13851 #define _mm512_getmant_ps(X, B, C) \
13852 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
13853 (int)(((C)<<2) | (B)), \
13854 (__v16sf)_mm512_undefined_ps(), \
13855 (__mmask16)-1,\
13856 _MM_FROUND_CUR_DIRECTION))
13858 #define _mm512_mask_getmant_ps(W, U, X, B, C) \
13859 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
13860 (int)(((C)<<2) | (B)), \
13861 (__v16sf)(__m512)(W), \
13862 (__mmask16)(U),\
13863 _MM_FROUND_CUR_DIRECTION))
13865 #define _mm512_maskz_getmant_ps(U, X, B, C) \
13866 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
13867 (int)(((C)<<2) | (B)), \
13868 (__v16sf)_mm512_setzero_ps(), \
13869 (__mmask16)(U),\
13870 _MM_FROUND_CUR_DIRECTION))
13871 #define _mm_getmant_sd(X, Y, C, D) \
13872 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
13873 (__v2df)(__m128d)(Y), \
13874 (int)(((D)<<2) | (C)), \
13875 _MM_FROUND_CUR_DIRECTION))
13877 #define _mm_mask_getmant_sd(W, U, X, Y, C, D) \
13878 ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
13879 (__v2df)(__m128d)(Y), \
13880 (int)(((D)<<2) | (C)), \
13881 (__v2df)(__m128d)(W), \
13882 (__mmask8)(U),\
13883 _MM_FROUND_CUR_DIRECTION))
13885 #define _mm_maskz_getmant_sd(U, X, Y, C, D) \
13886 ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
13887 (__v2df)(__m128d)(Y), \
13888 (int)(((D)<<2) | (C)), \
13889 (__v2df)_mm_setzero_pd(), \
13890 (__mmask8)(U),\
13891 _MM_FROUND_CUR_DIRECTION))
13893 #define _mm_getmant_ss(X, Y, C, D) \
13894 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
13895 (__v4sf)(__m128)(Y), \
13896 (int)(((D)<<2) | (C)), \
13897 _MM_FROUND_CUR_DIRECTION))
13899 #define _mm_mask_getmant_ss(W, U, X, Y, C, D) \
13900 ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
13901 (__v4sf)(__m128)(Y), \
13902 (int)(((D)<<2) | (C)), \
13903 (__v4sf)(__m128)(W), \
13904 (__mmask8)(U),\
13905 _MM_FROUND_CUR_DIRECTION))
13907 #define _mm_maskz_getmant_ss(U, X, Y, C, D) \
13908 ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
13909 (__v4sf)(__m128)(Y), \
13910 (int)(((D)<<2) | (C)), \
13911 (__v4sf)_mm_setzero_ps(), \
13912 (__mmask8)(U),\
13913 _MM_FROUND_CUR_DIRECTION))
13915 #define _mm_getexp_ss(A, B) \
13916 ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
13917 _MM_FROUND_CUR_DIRECTION))
13919 #define _mm_mask_getexp_ss(W, U, A, B) \
13920 (__m128)__builtin_ia32_getexpss_mask_round(A, B, W, U,\
13921 _MM_FROUND_CUR_DIRECTION)
13923 #define _mm_maskz_getexp_ss(U, A, B) \
13924 (__m128)__builtin_ia32_getexpss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U,\
13925 _MM_FROUND_CUR_DIRECTION)
13927 #define _mm_getexp_sd(A, B) \
13928 ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B),\
13929 _MM_FROUND_CUR_DIRECTION))
13931 #define _mm_mask_getexp_sd(W, U, A, B) \
13932 (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, W, U,\
13933 _MM_FROUND_CUR_DIRECTION)
13935 #define _mm_maskz_getexp_sd(U, A, B) \
13936 (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U,\
13937 _MM_FROUND_CUR_DIRECTION)
13939 #define _mm512_getexp_ps(A) \
13940 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
13941 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
13943 #define _mm512_mask_getexp_ps(W, U, A) \
13944 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
13945 (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
13947 #define _mm512_maskz_getexp_ps(U, A) \
13948 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
13949 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
13951 #define _mm512_getexp_pd(A) \
13952 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
13953 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
13955 #define _mm512_mask_getexp_pd(W, U, A) \
13956 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
13957 (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13959 #define _mm512_maskz_getexp_pd(U, A) \
13960 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
13961 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13962 #endif
13964 #ifdef __OPTIMIZE__
13965 extern __inline __m512
13966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13967 _mm512_roundscale_ps (__m512 __A, const int __imm)
13969 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
13970 (__v16sf)
13971 _mm512_undefined_ps (),
13973 _MM_FROUND_CUR_DIRECTION);
13976 extern __inline __m512
13977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13978 _mm512_mask_roundscale_ps (__m512 __A, __mmask16 __B, __m512 __C,
13979 const int __imm)
13981 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
13982 (__v16sf) __A,
13983 (__mmask16) __B,
13984 _MM_FROUND_CUR_DIRECTION);
13987 extern __inline __m512
13988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13989 _mm512_maskz_roundscale_ps (__mmask16 __A, __m512 __B, const int __imm)
13991 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
13992 __imm,
13993 (__v16sf)
13994 _mm512_setzero_ps (),
13995 (__mmask16) __A,
13996 _MM_FROUND_CUR_DIRECTION);
13999 extern __inline __m512d
14000 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14001 _mm512_roundscale_pd (__m512d __A, const int __imm)
14003 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
14004 (__v8df)
14005 _mm512_undefined_pd (),
14007 _MM_FROUND_CUR_DIRECTION);
14010 extern __inline __m512d
14011 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14012 _mm512_mask_roundscale_pd (__m512d __A, __mmask8 __B, __m512d __C,
14013 const int __imm)
14015 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
14016 (__v8df) __A,
14017 (__mmask8) __B,
14018 _MM_FROUND_CUR_DIRECTION);
14021 extern __inline __m512d
14022 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14023 _mm512_maskz_roundscale_pd (__mmask8 __A, __m512d __B, const int __imm)
14025 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
14026 __imm,
14027 (__v8df)
14028 _mm512_setzero_pd (),
14029 (__mmask8) __A,
14030 _MM_FROUND_CUR_DIRECTION);
14033 extern __inline __m128
14034 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14035 _mm_roundscale_ss (__m128 __A, __m128 __B, const int __imm)
14037 return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
14038 (__v4sf) __B, __imm,
14039 _MM_FROUND_CUR_DIRECTION);
14042 extern __inline __m128d
14043 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14044 _mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm)
14046 return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
14047 (__v2df) __B, __imm,
14048 _MM_FROUND_CUR_DIRECTION);
14051 #else
14052 #define _mm512_roundscale_ps(A, B) \
14053 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
14054 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
14055 #define _mm512_mask_roundscale_ps(A, B, C, D) \
14056 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
14057 (int)(D), \
14058 (__v16sf)(__m512)(A), \
14059 (__mmask16)(B), _MM_FROUND_CUR_DIRECTION))
14060 #define _mm512_maskz_roundscale_ps(A, B, C) \
14061 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
14062 (int)(C), \
14063 (__v16sf)_mm512_setzero_ps(),\
14064 (__mmask16)(A), _MM_FROUND_CUR_DIRECTION))
14065 #define _mm512_roundscale_pd(A, B) \
14066 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
14067 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
14068 #define _mm512_mask_roundscale_pd(A, B, C, D) \
14069 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
14070 (int)(D), \
14071 (__v8df)(__m512d)(A), \
14072 (__mmask8)(B), _MM_FROUND_CUR_DIRECTION))
14073 #define _mm512_maskz_roundscale_pd(A, B, C) \
14074 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
14075 (int)(C), \
14076 (__v8df)_mm512_setzero_pd(),\
14077 (__mmask8)(A), _MM_FROUND_CUR_DIRECTION))
14078 #define _mm_roundscale_ss(A, B, C) \
14079 ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \
14080 (__v4sf)(__m128)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
14081 #define _mm_roundscale_sd(A, B, C) \
14082 ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \
14083 (__v2df)(__m128d)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
14084 #endif
14086 #ifdef __OPTIMIZE__
14087 extern __inline __mmask8
14088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14089 _mm512_cmp_pd_mask (__m512d __X, __m512d __Y, const int __P)
14091 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14092 (__v8df) __Y, __P,
14093 (__mmask8) -1,
14094 _MM_FROUND_CUR_DIRECTION);
14097 extern __inline __mmask16
14098 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14099 _mm512_cmp_ps_mask (__m512 __X, __m512 __Y, const int __P)
14101 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14102 (__v16sf) __Y, __P,
14103 (__mmask16) -1,
14104 _MM_FROUND_CUR_DIRECTION);
14107 extern __inline __mmask16
14108 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14109 _mm512_mask_cmp_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y, const int __P)
14111 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14112 (__v16sf) __Y, __P,
14113 (__mmask16) __U,
14114 _MM_FROUND_CUR_DIRECTION);
14117 extern __inline __mmask8
14118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14119 _mm512_mask_cmp_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, const int __P)
14121 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14122 (__v8df) __Y, __P,
14123 (__mmask8) __U,
14124 _MM_FROUND_CUR_DIRECTION);
14127 extern __inline __mmask8
14128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14129 _mm512_cmpeq_pd_mask (__m512d __X, __m512d __Y)
14131 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14132 (__v8df) __Y, _CMP_EQ_OQ,
14133 (__mmask8) -1,
14134 _MM_FROUND_CUR_DIRECTION);
14137 extern __inline __mmask8
14138 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14139 _mm512_mask_cmpeq_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
14141 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14142 (__v8df) __Y, _CMP_EQ_OQ,
14143 (__mmask8) __U,
14144 _MM_FROUND_CUR_DIRECTION);
14147 extern __inline __mmask8
14148 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14149 _mm512_cmplt_pd_mask (__m512d __X, __m512d __Y)
14151 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14152 (__v8df) __Y, _CMP_LT_OS,
14153 (__mmask8) -1,
14154 _MM_FROUND_CUR_DIRECTION);
14157 extern __inline __mmask8
14158 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14159 _mm512_mask_cmplt_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
14161 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14162 (__v8df) __Y, _CMP_LT_OS,
14163 (__mmask8) __U,
14164 _MM_FROUND_CUR_DIRECTION);
14167 extern __inline __mmask8
14168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14169 _mm512_cmple_pd_mask (__m512d __X, __m512d __Y)
14171 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14172 (__v8df) __Y, _CMP_LE_OS,
14173 (__mmask8) -1,
14174 _MM_FROUND_CUR_DIRECTION);
14177 extern __inline __mmask8
14178 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14179 _mm512_mask_cmple_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
14181 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14182 (__v8df) __Y, _CMP_LE_OS,
14183 (__mmask8) __U,
14184 _MM_FROUND_CUR_DIRECTION);
14187 extern __inline __mmask8
14188 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14189 _mm512_cmpunord_pd_mask (__m512d __X, __m512d __Y)
14191 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14192 (__v8df) __Y, _CMP_UNORD_Q,
14193 (__mmask8) -1,
14194 _MM_FROUND_CUR_DIRECTION);
14197 extern __inline __mmask8
14198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14199 _mm512_mask_cmpunord_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
14201 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14202 (__v8df) __Y, _CMP_UNORD_Q,
14203 (__mmask8) __U,
14204 _MM_FROUND_CUR_DIRECTION);
14207 extern __inline __mmask8
14208 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14209 _mm512_cmpneq_pd_mask (__m512d __X, __m512d __Y)
14211 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14212 (__v8df) __Y, _CMP_NEQ_UQ,
14213 (__mmask8) -1,
14214 _MM_FROUND_CUR_DIRECTION);
14217 extern __inline __mmask8
14218 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14219 _mm512_mask_cmpneq_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
14221 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14222 (__v8df) __Y, _CMP_NEQ_UQ,
14223 (__mmask8) __U,
14224 _MM_FROUND_CUR_DIRECTION);
14227 extern __inline __mmask8
14228 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14229 _mm512_cmpnlt_pd_mask (__m512d __X, __m512d __Y)
14231 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14232 (__v8df) __Y, _CMP_NLT_US,
14233 (__mmask8) -1,
14234 _MM_FROUND_CUR_DIRECTION);
14237 extern __inline __mmask8
14238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14239 _mm512_mask_cmpnlt_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
14241 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14242 (__v8df) __Y, _CMP_NLT_US,
14243 (__mmask8) __U,
14244 _MM_FROUND_CUR_DIRECTION);
14247 extern __inline __mmask8
14248 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14249 _mm512_cmpnle_pd_mask (__m512d __X, __m512d __Y)
14251 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14252 (__v8df) __Y, _CMP_NLE_US,
14253 (__mmask8) -1,
14254 _MM_FROUND_CUR_DIRECTION);
14257 extern __inline __mmask8
14258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14259 _mm512_mask_cmpnle_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
14261 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14262 (__v8df) __Y, _CMP_NLE_US,
14263 (__mmask8) __U,
14264 _MM_FROUND_CUR_DIRECTION);
14267 extern __inline __mmask8
14268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14269 _mm512_cmpord_pd_mask (__m512d __X, __m512d __Y)
14271 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14272 (__v8df) __Y, _CMP_ORD_Q,
14273 (__mmask8) -1,
14274 _MM_FROUND_CUR_DIRECTION);
14277 extern __inline __mmask8
14278 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14279 _mm512_mask_cmpord_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
14281 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14282 (__v8df) __Y, _CMP_ORD_Q,
14283 (__mmask8) __U,
14284 _MM_FROUND_CUR_DIRECTION);
14287 extern __inline __mmask16
14288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14289 _mm512_cmpeq_ps_mask (__m512 __X, __m512 __Y)
14291 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14292 (__v16sf) __Y, _CMP_EQ_OQ,
14293 (__mmask16) -1,
14294 _MM_FROUND_CUR_DIRECTION);
14297 extern __inline __mmask16
14298 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14299 _mm512_mask_cmpeq_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
14301 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14302 (__v16sf) __Y, _CMP_EQ_OQ,
14303 (__mmask16) __U,
14304 _MM_FROUND_CUR_DIRECTION);
14307 extern __inline __mmask16
14308 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14309 _mm512_cmplt_ps_mask (__m512 __X, __m512 __Y)
14311 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14312 (__v16sf) __Y, _CMP_LT_OS,
14313 (__mmask16) -1,
14314 _MM_FROUND_CUR_DIRECTION);
14317 extern __inline __mmask16
14318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14319 _mm512_mask_cmplt_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
14321 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14322 (__v16sf) __Y, _CMP_LT_OS,
14323 (__mmask16) __U,
14324 _MM_FROUND_CUR_DIRECTION);
14327 extern __inline __mmask16
14328 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14329 _mm512_cmple_ps_mask (__m512 __X, __m512 __Y)
14331 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14332 (__v16sf) __Y, _CMP_LE_OS,
14333 (__mmask16) -1,
14334 _MM_FROUND_CUR_DIRECTION);
14337 extern __inline __mmask16
14338 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14339 _mm512_mask_cmple_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
14341 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14342 (__v16sf) __Y, _CMP_LE_OS,
14343 (__mmask16) __U,
14344 _MM_FROUND_CUR_DIRECTION);
14347 extern __inline __mmask16
14348 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14349 _mm512_cmpunord_ps_mask (__m512 __X, __m512 __Y)
14351 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14352 (__v16sf) __Y, _CMP_UNORD_Q,
14353 (__mmask16) -1,
14354 _MM_FROUND_CUR_DIRECTION);
14357 extern __inline __mmask16
14358 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14359 _mm512_mask_cmpunord_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
14361 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14362 (__v16sf) __Y, _CMP_UNORD_Q,
14363 (__mmask16) __U,
14364 _MM_FROUND_CUR_DIRECTION);
14367 extern __inline __mmask16
14368 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14369 _mm512_cmpneq_ps_mask (__m512 __X, __m512 __Y)
14371 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14372 (__v16sf) __Y, _CMP_NEQ_UQ,
14373 (__mmask16) -1,
14374 _MM_FROUND_CUR_DIRECTION);
14377 extern __inline __mmask16
14378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14379 _mm512_mask_cmpneq_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
14381 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14382 (__v16sf) __Y, _CMP_NEQ_UQ,
14383 (__mmask16) __U,
14384 _MM_FROUND_CUR_DIRECTION);
14387 extern __inline __mmask16
14388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14389 _mm512_cmpnlt_ps_mask (__m512 __X, __m512 __Y)
14391 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14392 (__v16sf) __Y, _CMP_NLT_US,
14393 (__mmask16) -1,
14394 _MM_FROUND_CUR_DIRECTION);
14397 extern __inline __mmask16
14398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14399 _mm512_mask_cmpnlt_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
14401 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14402 (__v16sf) __Y, _CMP_NLT_US,
14403 (__mmask16) __U,
14404 _MM_FROUND_CUR_DIRECTION);
14407 extern __inline __mmask16
14408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14409 _mm512_cmpnle_ps_mask (__m512 __X, __m512 __Y)
14411 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14412 (__v16sf) __Y, _CMP_NLE_US,
14413 (__mmask16) -1,
14414 _MM_FROUND_CUR_DIRECTION);
14417 extern __inline __mmask16
14418 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14419 _mm512_mask_cmpnle_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
14421 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14422 (__v16sf) __Y, _CMP_NLE_US,
14423 (__mmask16) __U,
14424 _MM_FROUND_CUR_DIRECTION);
14427 extern __inline __mmask16
14428 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14429 _mm512_cmpord_ps_mask (__m512 __X, __m512 __Y)
14431 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14432 (__v16sf) __Y, _CMP_ORD_Q,
14433 (__mmask16) -1,
14434 _MM_FROUND_CUR_DIRECTION);
14437 extern __inline __mmask16
14438 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14439 _mm512_mask_cmpord_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
14441 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
14442 (__v16sf) __Y, _CMP_ORD_Q,
14443 (__mmask16) __U,
14444 _MM_FROUND_CUR_DIRECTION);
14447 extern __inline __mmask8
14448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14449 _mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P)
14451 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
14452 (__v2df) __Y, __P,
14453 (__mmask8) -1,
14454 _MM_FROUND_CUR_DIRECTION);
14457 extern __inline __mmask8
14458 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14459 _mm_mask_cmp_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, const int __P)
14461 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
14462 (__v2df) __Y, __P,
14463 (__mmask8) __M,
14464 _MM_FROUND_CUR_DIRECTION);
14467 extern __inline __mmask8
14468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14469 _mm_cmp_ss_mask (__m128 __X, __m128 __Y, const int __P)
14471 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
14472 (__v4sf) __Y, __P,
14473 (__mmask8) -1,
14474 _MM_FROUND_CUR_DIRECTION);
14477 extern __inline __mmask8
14478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14479 _mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
14481 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
14482 (__v4sf) __Y, __P,
14483 (__mmask8) __M,
14484 _MM_FROUND_CUR_DIRECTION);
14487 #else
14488 #define _mm512_cmp_pd_mask(X, Y, P) \
14489 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
14490 (__v8df)(__m512d)(Y), (int)(P),\
14491 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
14493 #define _mm512_cmp_ps_mask(X, Y, P) \
14494 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
14495 (__v16sf)(__m512)(Y), (int)(P),\
14496 (__mmask16)-1,_MM_FROUND_CUR_DIRECTION))
14498 #define _mm512_mask_cmp_pd_mask(M, X, Y, P) \
14499 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
14500 (__v8df)(__m512d)(Y), (int)(P),\
14501 (__mmask8)M, _MM_FROUND_CUR_DIRECTION))
14503 #define _mm512_mask_cmp_ps_mask(M, X, Y, P) \
14504 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
14505 (__v16sf)(__m512)(Y), (int)(P),\
14506 (__mmask16)M,_MM_FROUND_CUR_DIRECTION))
14508 #define _mm_cmp_sd_mask(X, Y, P) \
14509 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
14510 (__v2df)(__m128d)(Y), (int)(P),\
14511 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
14513 #define _mm_mask_cmp_sd_mask(M, X, Y, P) \
14514 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
14515 (__v2df)(__m128d)(Y), (int)(P),\
14516 M,_MM_FROUND_CUR_DIRECTION))
14518 #define _mm_cmp_ss_mask(X, Y, P) \
14519 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
14520 (__v4sf)(__m128)(Y), (int)(P), \
14521 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
14523 #define _mm_mask_cmp_ss_mask(M, X, Y, P) \
14524 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
14525 (__v4sf)(__m128)(Y), (int)(P), \
14526 M,_MM_FROUND_CUR_DIRECTION))
14527 #endif
14529 extern __inline __mmask16
14530 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14531 _mm512_kmov (__mmask16 __A)
14533 return __builtin_ia32_kmovw (__A);
14536 extern __inline __m512
14537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14538 _mm512_castpd_ps (__m512d __A)
14540 return (__m512) (__A);
14543 extern __inline __m512i
14544 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14545 _mm512_castpd_si512 (__m512d __A)
14547 return (__m512i) (__A);
14550 extern __inline __m512d
14551 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14552 _mm512_castps_pd (__m512 __A)
14554 return (__m512d) (__A);
14557 extern __inline __m512i
14558 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14559 _mm512_castps_si512 (__m512 __A)
14561 return (__m512i) (__A);
14564 extern __inline __m512
14565 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14566 _mm512_castsi512_ps (__m512i __A)
14568 return (__m512) (__A);
14571 extern __inline __m512d
14572 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14573 _mm512_castsi512_pd (__m512i __A)
14575 return (__m512d) (__A);
14578 extern __inline __m128d
14579 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14580 _mm512_castpd512_pd128 (__m512d __A)
14582 return (__m128d)_mm512_extractf32x4_ps((__m512)__A, 0);
14585 extern __inline __m128
14586 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14587 _mm512_castps512_ps128 (__m512 __A)
14589 return _mm512_extractf32x4_ps(__A, 0);
14592 extern __inline __m128i
14593 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14594 _mm512_castsi512_si128 (__m512i __A)
14596 return (__m128i)_mm512_extracti32x4_epi32((__m512i)__A, 0);
14599 extern __inline __m256d
14600 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14601 _mm512_castpd512_pd256 (__m512d __A)
14603 return _mm512_extractf64x4_pd(__A, 0);
14606 extern __inline __m256
14607 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14608 _mm512_castps512_ps256 (__m512 __A)
14610 return (__m256)_mm512_extractf64x4_pd((__m512d)__A, 0);
14613 extern __inline __m256i
14614 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14615 _mm512_castsi512_si256 (__m512i __A)
14617 return (__m256i)_mm512_extractf64x4_pd((__m512d)__A, 0);
14620 extern __inline __m512d
14621 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14622 _mm512_castpd128_pd512 (__m128d __A)
14624 return (__m512d) __builtin_ia32_pd512_pd((__m128d)__A);
14627 extern __inline __m512
14628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14629 _mm512_castps128_ps512 (__m128 __A)
14631 return (__m512) __builtin_ia32_ps512_ps((__m128)__A);
14634 extern __inline __m512i
14635 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14636 _mm512_castsi128_si512 (__m128i __A)
14638 return (__m512i) __builtin_ia32_si512_si((__v4si)__A);
14641 extern __inline __m512d
14642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14643 _mm512_castpd256_pd512 (__m256d __A)
14645 return __builtin_ia32_pd512_256pd (__A);
14648 extern __inline __m512
14649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14650 _mm512_castps256_ps512 (__m256 __A)
14652 return __builtin_ia32_ps512_256ps (__A);
14655 extern __inline __m512i
14656 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14657 _mm512_castsi256_si512 (__m256i __A)
14659 return (__m512i)__builtin_ia32_si512_256si ((__v8si)__A);
14662 extern __inline __mmask16
14663 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14664 _mm512_cmpeq_epu32_mask (__m512i __A, __m512i __B)
14666 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
14667 (__v16si) __B, 0,
14668 (__mmask16) -1);
14671 extern __inline __mmask16
14672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14673 _mm512_mask_cmpeq_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
14675 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
14676 (__v16si) __B, 0, __U);
14679 extern __inline __mmask8
14680 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14681 _mm512_mask_cmpeq_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
14683 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
14684 (__v8di) __B, 0, __U);
14687 extern __inline __mmask8
14688 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14689 _mm512_cmpeq_epu64_mask (__m512i __A, __m512i __B)
14691 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
14692 (__v8di) __B, 0,
14693 (__mmask8) -1);
14696 extern __inline __mmask16
14697 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14698 _mm512_cmpgt_epu32_mask (__m512i __A, __m512i __B)
14700 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
14701 (__v16si) __B, 6,
14702 (__mmask16) -1);
14705 extern __inline __mmask16
14706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14707 _mm512_mask_cmpgt_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
14709 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
14710 (__v16si) __B, 6, __U);
14713 extern __inline __mmask8
14714 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14715 _mm512_mask_cmpgt_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
14717 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
14718 (__v8di) __B, 6, __U);
14721 extern __inline __mmask8
14722 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14723 _mm512_cmpgt_epu64_mask (__m512i __A, __m512i __B)
14725 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
14726 (__v8di) __B, 6,
14727 (__mmask8) -1);
14730 #undef __MM512_REDUCE_OP
14731 #define __MM512_REDUCE_OP(op) \
14732 __v8si __T1 = (__v8si) _mm512_extracti64x4_epi64 (__A, 1); \
14733 __v8si __T2 = (__v8si) _mm512_extracti64x4_epi64 (__A, 0); \
14734 __m256i __T3 = (__m256i) (__T1 op __T2); \
14735 __v4si __T4 = (__v4si) _mm256_extracti128_si256 (__T3, 1); \
14736 __v4si __T5 = (__v4si) _mm256_extracti128_si256 (__T3, 0); \
14737 __v4si __T6 = __T4 op __T5; \
14738 __v4si __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 }); \
14739 __v4si __T8 = __T6 op __T7; \
14740 return __T8[0] op __T8[1]
14742 extern __inline int
14743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14744 _mm512_reduce_add_epi32 (__m512i __A)
14746 __MM512_REDUCE_OP (+);
14749 extern __inline int
14750 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14751 _mm512_reduce_mul_epi32 (__m512i __A)
14753 __MM512_REDUCE_OP (*);
14756 extern __inline int
14757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14758 _mm512_reduce_and_epi32 (__m512i __A)
14760 __MM512_REDUCE_OP (&);
14763 extern __inline int
14764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14765 _mm512_reduce_or_epi32 (__m512i __A)
14767 __MM512_REDUCE_OP (|);
14770 extern __inline int
14771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14772 _mm512_mask_reduce_add_epi32 (__mmask16 __U, __m512i __A)
14774 __A = _mm512_maskz_mov_epi32 (__U, __A);
14775 __MM512_REDUCE_OP (+);
14778 extern __inline int
14779 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14780 _mm512_mask_reduce_mul_epi32 (__mmask16 __U, __m512i __A)
14782 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (1), __U, __A);
14783 __MM512_REDUCE_OP (*);
14786 extern __inline int
14787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14788 _mm512_mask_reduce_and_epi32 (__mmask16 __U, __m512i __A)
14790 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (~0), __U, __A);
14791 __MM512_REDUCE_OP (&);
14794 extern __inline int
14795 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14796 _mm512_mask_reduce_or_epi32 (__mmask16 __U, __m512i __A)
14798 __A = _mm512_maskz_mov_epi32 (__U, __A);
14799 __MM512_REDUCE_OP (|);
14802 #undef __MM512_REDUCE_OP
14803 #define __MM512_REDUCE_OP(op) \
14804 __m256i __T1 = (__m256i) _mm512_extracti64x4_epi64 (__A, 1); \
14805 __m256i __T2 = (__m256i) _mm512_extracti64x4_epi64 (__A, 0); \
14806 __m256i __T3 = _mm256_##op (__T1, __T2); \
14807 __m128i __T4 = (__m128i) _mm256_extracti128_si256 (__T3, 1); \
14808 __m128i __T5 = (__m128i) _mm256_extracti128_si256 (__T3, 0); \
14809 __m128i __T6 = _mm_##op (__T4, __T5); \
14810 __m128i __T7 = (__m128i) __builtin_shuffle ((__v4si) __T6, \
14811 (__v4si) { 2, 3, 0, 1 }); \
14812 __m128i __T8 = _mm_##op (__T6, __T7); \
14813 __m128i __T9 = (__m128i) __builtin_shuffle ((__v4si) __T8, \
14814 (__v4si) { 1, 0, 1, 0 }); \
14815 __v4si __T10 = (__v4si) _mm_##op (__T8, __T9); \
14816 return __T10[0]
14818 extern __inline int
14819 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14820 _mm512_reduce_min_epi32 (__m512i __A)
14822 __MM512_REDUCE_OP (min_epi32);
14825 extern __inline int
14826 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14827 _mm512_reduce_max_epi32 (__m512i __A)
14829 __MM512_REDUCE_OP (max_epi32);
14832 extern __inline unsigned int
14833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14834 _mm512_reduce_min_epu32 (__m512i __A)
14836 __MM512_REDUCE_OP (min_epu32);
14839 extern __inline unsigned int
14840 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14841 _mm512_reduce_max_epu32 (__m512i __A)
14843 __MM512_REDUCE_OP (max_epu32);
14846 extern __inline int
14847 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14848 _mm512_mask_reduce_min_epi32 (__mmask16 __U, __m512i __A)
14850 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (__INT_MAX__), __U, __A);
14851 __MM512_REDUCE_OP (min_epi32);
14854 extern __inline int
14855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14856 _mm512_mask_reduce_max_epi32 (__mmask16 __U, __m512i __A)
14858 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (-__INT_MAX__ - 1), __U, __A);
14859 __MM512_REDUCE_OP (max_epi32);
14862 extern __inline unsigned int
14863 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14864 _mm512_mask_reduce_min_epu32 (__mmask16 __U, __m512i __A)
14866 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (~0), __U, __A);
14867 __MM512_REDUCE_OP (min_epu32);
14870 extern __inline unsigned int
14871 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14872 _mm512_mask_reduce_max_epu32 (__mmask16 __U, __m512i __A)
14874 __A = _mm512_maskz_mov_epi32 (__U, __A);
14875 __MM512_REDUCE_OP (max_epu32);
14878 #undef __MM512_REDUCE_OP
14879 #define __MM512_REDUCE_OP(op) \
14880 __m256 __T1 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 1); \
14881 __m256 __T2 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 0); \
14882 __m256 __T3 = __T1 op __T2; \
14883 __m128 __T4 = _mm256_extractf128_ps (__T3, 1); \
14884 __m128 __T5 = _mm256_extractf128_ps (__T3, 0); \
14885 __m128 __T6 = __T4 op __T5; \
14886 __m128 __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 }); \
14887 __m128 __T8 = __T6 op __T7; \
14888 return __T8[0] op __T8[1]
14890 extern __inline float
14891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14892 _mm512_reduce_add_ps (__m512 __A)
14894 __MM512_REDUCE_OP (+);
14897 extern __inline float
14898 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14899 _mm512_reduce_mul_ps (__m512 __A)
14901 __MM512_REDUCE_OP (*);
14904 extern __inline float
14905 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14906 _mm512_mask_reduce_add_ps (__mmask16 __U, __m512 __A)
14908 __A = _mm512_maskz_mov_ps (__U, __A);
14909 __MM512_REDUCE_OP (+);
14912 extern __inline float
14913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14914 _mm512_mask_reduce_mul_ps (__mmask16 __U, __m512 __A)
14916 __A = _mm512_mask_mov_ps (_mm512_set1_ps (1.0f), __U, __A);
14917 __MM512_REDUCE_OP (*);
14920 #undef __MM512_REDUCE_OP
14921 #define __MM512_REDUCE_OP(op) \
14922 __m256 __T1 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 1); \
14923 __m256 __T2 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 0); \
14924 __m256 __T3 = _mm256_##op (__T1, __T2); \
14925 __m128 __T4 = _mm256_extractf128_ps (__T3, 1); \
14926 __m128 __T5 = _mm256_extractf128_ps (__T3, 0); \
14927 __m128 __T6 = _mm_##op (__T4, __T5); \
14928 __m128 __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 }); \
14929 __m128 __T8 = _mm_##op (__T6, __T7); \
14930 __m128 __T9 = __builtin_shuffle (__T8, (__v4si) { 1, 0, 1, 0 }); \
14931 __m128 __T10 = _mm_##op (__T8, __T9); \
14932 return __T10[0]
14934 extern __inline float
14935 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14936 _mm512_reduce_min_ps (__m512 __A)
14938 __MM512_REDUCE_OP (min_ps);
14941 extern __inline float
14942 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14943 _mm512_reduce_max_ps (__m512 __A)
14945 __MM512_REDUCE_OP (max_ps);
14948 extern __inline float
14949 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14950 _mm512_mask_reduce_min_ps (__mmask16 __U, __m512 __A)
14952 __A = _mm512_mask_mov_ps (_mm512_set1_ps (__builtin_inff ()), __U, __A);
14953 __MM512_REDUCE_OP (min_ps);
14956 extern __inline float
14957 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14958 _mm512_mask_reduce_max_ps (__mmask16 __U, __m512 __A)
14960 __A = _mm512_mask_mov_ps (_mm512_set1_ps (-__builtin_inff ()), __U, __A);
14961 __MM512_REDUCE_OP (max_ps);
14964 #undef __MM512_REDUCE_OP
14965 #define __MM512_REDUCE_OP(op) \
14966 __v4di __T1 = (__v4di) _mm512_extracti64x4_epi64 (__A, 1); \
14967 __v4di __T2 = (__v4di) _mm512_extracti64x4_epi64 (__A, 0); \
14968 __m256i __T3 = (__m256i) (__T1 op __T2); \
14969 __v2di __T4 = (__v2di) _mm256_extracti128_si256 (__T3, 1); \
14970 __v2di __T5 = (__v2di) _mm256_extracti128_si256 (__T3, 0); \
14971 __v2di __T6 = __T4 op __T5; \
14972 return __T6[0] op __T6[1]
14974 extern __inline long long
14975 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14976 _mm512_reduce_add_epi64 (__m512i __A)
14978 __MM512_REDUCE_OP (+);
14981 extern __inline long long
14982 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14983 _mm512_reduce_mul_epi64 (__m512i __A)
14985 __MM512_REDUCE_OP (*);
14988 extern __inline long long
14989 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14990 _mm512_reduce_and_epi64 (__m512i __A)
14992 __MM512_REDUCE_OP (&);
14995 extern __inline long long
14996 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14997 _mm512_reduce_or_epi64 (__m512i __A)
14999 __MM512_REDUCE_OP (|);
15002 extern __inline long long
15003 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15004 _mm512_mask_reduce_add_epi64 (__mmask8 __U, __m512i __A)
15006 __A = _mm512_maskz_mov_epi64 (__U, __A);
15007 __MM512_REDUCE_OP (+);
15010 extern __inline long long
15011 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15012 _mm512_mask_reduce_mul_epi64 (__mmask8 __U, __m512i __A)
15014 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (1LL), __U, __A);
15015 __MM512_REDUCE_OP (*);
15018 extern __inline long long
15019 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15020 _mm512_mask_reduce_and_epi64 (__mmask8 __U, __m512i __A)
15022 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (~0LL), __U, __A);
15023 __MM512_REDUCE_OP (&);
15026 extern __inline long long
15027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15028 _mm512_mask_reduce_or_epi64 (__mmask8 __U, __m512i __A)
15030 __A = _mm512_maskz_mov_epi64 (__U, __A);
15031 __MM512_REDUCE_OP (|);
15034 #undef __MM512_REDUCE_OP
15035 #define __MM512_REDUCE_OP(op) \
15036 __m512i __T1 = _mm512_shuffle_i64x2 (__A, __A, 0x4e); \
15037 __m512i __T2 = _mm512_##op (__A, __T1); \
15038 __m512i __T3 \
15039 = (__m512i) __builtin_shuffle ((__v8di) __T2, \
15040 (__v8di) { 2, 3, 0, 1, 6, 7, 4, 5 });\
15041 __m512i __T4 = _mm512_##op (__T2, __T3); \
15042 __m512i __T5 \
15043 = (__m512i) __builtin_shuffle ((__v8di) __T4, \
15044 (__v8di) { 1, 0, 3, 2, 5, 4, 7, 6 });\
15045 __v8di __T6 = (__v8di) _mm512_##op (__T4, __T5); \
15046 return __T6[0]
15048 extern __inline long long
15049 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15050 _mm512_reduce_min_epi64 (__m512i __A)
15052 __MM512_REDUCE_OP (min_epi64);
15055 extern __inline long long
15056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15057 _mm512_reduce_max_epi64 (__m512i __A)
15059 __MM512_REDUCE_OP (max_epi64);
15062 extern __inline long long
15063 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15064 _mm512_mask_reduce_min_epi64 (__mmask8 __U, __m512i __A)
15066 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (__LONG_LONG_MAX__),
15067 __U, __A);
15068 __MM512_REDUCE_OP (min_epi64);
15071 extern __inline long long
15072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15073 _mm512_mask_reduce_max_epi64 (__mmask8 __U, __m512i __A)
15075 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (-__LONG_LONG_MAX__ - 1),
15076 __U, __A);
15077 __MM512_REDUCE_OP (max_epi64);
15080 extern __inline unsigned long long
15081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15082 _mm512_reduce_min_epu64 (__m512i __A)
15084 __MM512_REDUCE_OP (min_epu64);
15087 extern __inline unsigned long long
15088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15089 _mm512_reduce_max_epu64 (__m512i __A)
15091 __MM512_REDUCE_OP (max_epu64);
15094 extern __inline unsigned long long
15095 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15096 _mm512_mask_reduce_min_epu64 (__mmask8 __U, __m512i __A)
15098 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (~0LL), __U, __A);
15099 __MM512_REDUCE_OP (min_epu64);
15102 extern __inline unsigned long long
15103 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15104 _mm512_mask_reduce_max_epu64 (__mmask8 __U, __m512i __A)
15106 __A = _mm512_maskz_mov_epi64 (__U, __A);
15107 __MM512_REDUCE_OP (max_epu64);
15110 #undef __MM512_REDUCE_OP
15111 #define __MM512_REDUCE_OP(op) \
15112 __m256d __T1 = (__m256d) _mm512_extractf64x4_pd (__A, 1); \
15113 __m256d __T2 = (__m256d) _mm512_extractf64x4_pd (__A, 0); \
15114 __m256d __T3 = __T1 op __T2; \
15115 __m128d __T4 = _mm256_extractf128_pd (__T3, 1); \
15116 __m128d __T5 = _mm256_extractf128_pd (__T3, 0); \
15117 __m128d __T6 = __T4 op __T5; \
15118 return __T6[0] op __T6[1]
15120 extern __inline double
15121 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15122 _mm512_reduce_add_pd (__m512d __A)
15124 __MM512_REDUCE_OP (+);
15127 extern __inline double
15128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15129 _mm512_reduce_mul_pd (__m512d __A)
15131 __MM512_REDUCE_OP (*);
15134 extern __inline double
15135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15136 _mm512_mask_reduce_add_pd (__mmask8 __U, __m512d __A)
15138 __A = _mm512_maskz_mov_pd (__U, __A);
15139 __MM512_REDUCE_OP (+);
15142 extern __inline double
15143 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15144 _mm512_mask_reduce_mul_pd (__mmask8 __U, __m512d __A)
15146 __A = _mm512_mask_mov_pd (_mm512_set1_pd (1.0), __U, __A);
15147 __MM512_REDUCE_OP (*);
15150 #undef __MM512_REDUCE_OP
15151 #define __MM512_REDUCE_OP(op) \
15152 __m256d __T1 = (__m256d) _mm512_extractf64x4_pd (__A, 1); \
15153 __m256d __T2 = (__m256d) _mm512_extractf64x4_pd (__A, 0); \
15154 __m256d __T3 = _mm256_##op (__T1, __T2); \
15155 __m128d __T4 = _mm256_extractf128_pd (__T3, 1); \
15156 __m128d __T5 = _mm256_extractf128_pd (__T3, 0); \
15157 __m128d __T6 = _mm_##op (__T4, __T5); \
15158 __m128d __T7 = (__m128d) __builtin_shuffle (__T6, (__v2di) { 1, 0 }); \
15159 __m128d __T8 = _mm_##op (__T6, __T7); \
15160 return __T8[0]
15162 extern __inline double
15163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15164 _mm512_reduce_min_pd (__m512d __A)
15166 __MM512_REDUCE_OP (min_pd);
15169 extern __inline double
15170 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15171 _mm512_reduce_max_pd (__m512d __A)
15173 __MM512_REDUCE_OP (max_pd);
15176 extern __inline double
15177 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15178 _mm512_mask_reduce_min_pd (__mmask8 __U, __m512d __A)
15180 __A = _mm512_mask_mov_pd (_mm512_set1_pd (__builtin_inf ()), __U, __A);
15181 __MM512_REDUCE_OP (min_pd);
15184 extern __inline double
15185 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15186 _mm512_mask_reduce_max_pd (__mmask8 __U, __m512d __A)
15188 __A = _mm512_mask_mov_pd (_mm512_set1_pd (-__builtin_inf ()), __U, __A);
15189 __MM512_REDUCE_OP (max_pd);
15192 #undef __MM512_REDUCE_OP
15194 #ifdef __DISABLE_AVX512F__
15195 #undef __DISABLE_AVX512F__
15196 #pragma GCC pop_options
15197 #endif /* __DISABLE_AVX512F__ */
15199 #endif /* _AVX512FINTRIN_H_INCLUDED */