* x86-tune-sched.c (ix86_adjust_cost): Fix Zen support.
[official-gcc.git] / gcc / config / i386 / avx512fintrin.h
blob72f57f7b6c930a5582b5e3835a09603bd085b3d1
1 /* Copyright (C) 2013-2017 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
24 #ifndef _IMMINTRIN_H_INCLUDED
25 #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
26 #endif
28 #ifndef _AVX512FINTRIN_H_INCLUDED
29 #define _AVX512FINTRIN_H_INCLUDED
31 #ifndef __AVX512F__
32 #pragma GCC push_options
33 #pragma GCC target("avx512f")
34 #define __DISABLE_AVX512F__
35 #endif /* __AVX512F__ */
37 /* Internal data types for implementing the intrinsics. */
38 typedef double __v8df __attribute__ ((__vector_size__ (64)));
39 typedef float __v16sf __attribute__ ((__vector_size__ (64)));
40 typedef long long __v8di __attribute__ ((__vector_size__ (64)));
41 typedef unsigned long long __v8du __attribute__ ((__vector_size__ (64)));
42 typedef int __v16si __attribute__ ((__vector_size__ (64)));
43 typedef unsigned int __v16su __attribute__ ((__vector_size__ (64)));
44 typedef short __v32hi __attribute__ ((__vector_size__ (64)));
45 typedef unsigned short __v32hu __attribute__ ((__vector_size__ (64)));
46 typedef char __v64qi __attribute__ ((__vector_size__ (64)));
47 typedef unsigned char __v64qu __attribute__ ((__vector_size__ (64)));
49 /* The Intel API is flexible enough that we must allow aliasing with other
50 vector types, and their scalar components. */
51 typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
52 typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
53 typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
55 /* Unaligned version of the same type. */
56 typedef float __m512_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
57 typedef long long __m512i_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
58 typedef double __m512d_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
60 typedef unsigned char __mmask8;
61 typedef unsigned short __mmask16;
63 extern __inline __mmask16
64 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
65 _mm512_int2mask (int __M)
67 return (__mmask16) __M;
70 extern __inline int
71 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
72 _mm512_mask2int (__mmask16 __M)
74 return (int) __M;
77 extern __inline __m512i
78 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79 _mm512_set_epi64 (long long __A, long long __B, long long __C,
80 long long __D, long long __E, long long __F,
81 long long __G, long long __H)
83 return __extension__ (__m512i) (__v8di)
84 { __H, __G, __F, __E, __D, __C, __B, __A };
87 /* Create the vector [A B C D E F G H I J K L M N O P]. */
88 extern __inline __m512i
89 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
90 _mm512_set_epi32 (int __A, int __B, int __C, int __D,
91 int __E, int __F, int __G, int __H,
92 int __I, int __J, int __K, int __L,
93 int __M, int __N, int __O, int __P)
95 return __extension__ (__m512i)(__v16si)
96 { __P, __O, __N, __M, __L, __K, __J, __I,
97 __H, __G, __F, __E, __D, __C, __B, __A };
100 extern __inline __m512d
101 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
102 _mm512_set_pd (double __A, double __B, double __C, double __D,
103 double __E, double __F, double __G, double __H)
105 return __extension__ (__m512d)
106 { __H, __G, __F, __E, __D, __C, __B, __A };
109 extern __inline __m512
110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
111 _mm512_set_ps (float __A, float __B, float __C, float __D,
112 float __E, float __F, float __G, float __H,
113 float __I, float __J, float __K, float __L,
114 float __M, float __N, float __O, float __P)
116 return __extension__ (__m512)
117 { __P, __O, __N, __M, __L, __K, __J, __I,
118 __H, __G, __F, __E, __D, __C, __B, __A };
121 #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \
122 _mm512_set_epi64(e7,e6,e5,e4,e3,e2,e1,e0)
124 #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \
125 e8,e9,e10,e11,e12,e13,e14,e15) \
126 _mm512_set_epi32(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
128 #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \
129 _mm512_set_pd(e7,e6,e5,e4,e3,e2,e1,e0)
131 #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
132 _mm512_set_ps(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
134 extern __inline __m512
135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
136 _mm512_undefined_ps (void)
138 __m512 __Y = __Y;
139 return __Y;
142 #define _mm512_undefined _mm512_undefined_ps
144 extern __inline __m512d
145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
146 _mm512_undefined_pd (void)
148 __m512d __Y = __Y;
149 return __Y;
152 extern __inline __m512i
153 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
154 _mm512_undefined_epi32 (void)
156 __m512i __Y = __Y;
157 return __Y;
160 #define _mm512_undefined_si512 _mm512_undefined_epi32
162 extern __inline __m512i
163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
164 _mm512_set1_epi8 (char __A)
166 return __extension__ (__m512i)(__v64qi)
167 { __A, __A, __A, __A, __A, __A, __A, __A,
168 __A, __A, __A, __A, __A, __A, __A, __A,
169 __A, __A, __A, __A, __A, __A, __A, __A,
170 __A, __A, __A, __A, __A, __A, __A, __A,
171 __A, __A, __A, __A, __A, __A, __A, __A,
172 __A, __A, __A, __A, __A, __A, __A, __A,
173 __A, __A, __A, __A, __A, __A, __A, __A,
174 __A, __A, __A, __A, __A, __A, __A, __A };
177 extern __inline __m512i
178 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
179 _mm512_set1_epi16 (short __A)
181 return __extension__ (__m512i)(__v32hi)
182 { __A, __A, __A, __A, __A, __A, __A, __A,
183 __A, __A, __A, __A, __A, __A, __A, __A,
184 __A, __A, __A, __A, __A, __A, __A, __A,
185 __A, __A, __A, __A, __A, __A, __A, __A };
188 extern __inline __m512d
189 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
190 _mm512_set1_pd (double __A)
192 return (__m512d) __builtin_ia32_broadcastsd512 (__extension__
193 (__v2df) { __A, },
194 (__v8df)
195 _mm512_undefined_pd (),
196 (__mmask8) -1);
199 extern __inline __m512
200 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
201 _mm512_set1_ps (float __A)
203 return (__m512) __builtin_ia32_broadcastss512 (__extension__
204 (__v4sf) { __A, },
205 (__v16sf)
206 _mm512_undefined_ps (),
207 (__mmask16) -1);
210 /* Create the vector [A B C D A B C D A B C D A B C D]. */
211 extern __inline __m512i
212 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
213 _mm512_set4_epi32 (int __A, int __B, int __C, int __D)
215 return __extension__ (__m512i)(__v16si)
216 { __D, __C, __B, __A, __D, __C, __B, __A,
217 __D, __C, __B, __A, __D, __C, __B, __A };
220 extern __inline __m512i
221 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
222 _mm512_set4_epi64 (long long __A, long long __B, long long __C,
223 long long __D)
225 return __extension__ (__m512i) (__v8di)
226 { __D, __C, __B, __A, __D, __C, __B, __A };
229 extern __inline __m512d
230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
231 _mm512_set4_pd (double __A, double __B, double __C, double __D)
233 return __extension__ (__m512d)
234 { __D, __C, __B, __A, __D, __C, __B, __A };
237 extern __inline __m512
238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
239 _mm512_set4_ps (float __A, float __B, float __C, float __D)
241 return __extension__ (__m512)
242 { __D, __C, __B, __A, __D, __C, __B, __A,
243 __D, __C, __B, __A, __D, __C, __B, __A };
246 #define _mm512_setr4_epi64(e0,e1,e2,e3) \
247 _mm512_set4_epi64(e3,e2,e1,e0)
249 #define _mm512_setr4_epi32(e0,e1,e2,e3) \
250 _mm512_set4_epi32(e3,e2,e1,e0)
252 #define _mm512_setr4_pd(e0,e1,e2,e3) \
253 _mm512_set4_pd(e3,e2,e1,e0)
255 #define _mm512_setr4_ps(e0,e1,e2,e3) \
256 _mm512_set4_ps(e3,e2,e1,e0)
258 extern __inline __m512
259 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
260 _mm512_setzero_ps (void)
262 return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
263 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
266 extern __inline __m512d
267 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
268 _mm512_setzero_pd (void)
270 return __extension__ (__m512d) { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
273 extern __inline __m512i
274 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
275 _mm512_setzero_epi32 (void)
277 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
280 extern __inline __m512i
281 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
282 _mm512_setzero_si512 (void)
284 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
287 extern __inline __m512d
288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
289 _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
291 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
292 (__v8df) __W,
293 (__mmask8) __U);
296 extern __inline __m512d
297 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
298 _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
300 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
301 (__v8df)
302 _mm512_setzero_pd (),
303 (__mmask8) __U);
306 extern __inline __m512
307 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
308 _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
310 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
311 (__v16sf) __W,
312 (__mmask16) __U);
315 extern __inline __m512
316 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
317 _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
319 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
320 (__v16sf)
321 _mm512_setzero_ps (),
322 (__mmask16) __U);
325 extern __inline __m512d
326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
327 _mm512_load_pd (void const *__P)
329 return *(__m512d *) __P;
332 extern __inline __m512d
333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
334 _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
336 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
337 (__v8df) __W,
338 (__mmask8) __U);
341 extern __inline __m512d
342 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
343 _mm512_maskz_load_pd (__mmask8 __U, void const *__P)
345 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
346 (__v8df)
347 _mm512_setzero_pd (),
348 (__mmask8) __U);
351 extern __inline void
352 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
353 _mm512_store_pd (void *__P, __m512d __A)
355 *(__m512d *) __P = __A;
358 extern __inline void
359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
360 _mm512_mask_store_pd (void *__P, __mmask8 __U, __m512d __A)
362 __builtin_ia32_storeapd512_mask ((__v8df *) __P, (__v8df) __A,
363 (__mmask8) __U);
366 extern __inline __m512
367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
368 _mm512_load_ps (void const *__P)
370 return *(__m512 *) __P;
373 extern __inline __m512
374 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
375 _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
377 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
378 (__v16sf) __W,
379 (__mmask16) __U);
382 extern __inline __m512
383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
384 _mm512_maskz_load_ps (__mmask16 __U, void const *__P)
386 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
387 (__v16sf)
388 _mm512_setzero_ps (),
389 (__mmask16) __U);
392 extern __inline void
393 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
394 _mm512_store_ps (void *__P, __m512 __A)
396 *(__m512 *) __P = __A;
399 extern __inline void
400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
401 _mm512_mask_store_ps (void *__P, __mmask16 __U, __m512 __A)
403 __builtin_ia32_storeaps512_mask ((__v16sf *) __P, (__v16sf) __A,
404 (__mmask16) __U);
407 extern __inline __m512i
408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
409 _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
411 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
412 (__v8di) __W,
413 (__mmask8) __U);
416 extern __inline __m512i
417 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
418 _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
420 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
421 (__v8di)
422 _mm512_setzero_si512 (),
423 (__mmask8) __U);
426 extern __inline __m512i
427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
428 _mm512_load_epi64 (void const *__P)
430 return *(__m512i *) __P;
433 extern __inline __m512i
434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
435 _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
437 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
438 (__v8di) __W,
439 (__mmask8) __U);
442 extern __inline __m512i
443 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
444 _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
446 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
447 (__v8di)
448 _mm512_setzero_si512 (),
449 (__mmask8) __U);
452 extern __inline void
453 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
454 _mm512_store_epi64 (void *__P, __m512i __A)
456 *(__m512i *) __P = __A;
459 extern __inline void
460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
461 _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
463 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
464 (__mmask8) __U);
467 extern __inline __m512i
468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
469 _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
471 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
472 (__v16si) __W,
473 (__mmask16) __U);
476 extern __inline __m512i
477 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
478 _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
480 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
481 (__v16si)
482 _mm512_setzero_si512 (),
483 (__mmask16) __U);
486 extern __inline __m512i
487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
488 _mm512_load_si512 (void const *__P)
490 return *(__m512i *) __P;
493 extern __inline __m512i
494 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
495 _mm512_load_epi32 (void const *__P)
497 return *(__m512i *) __P;
500 extern __inline __m512i
501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
502 _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
504 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
505 (__v16si) __W,
506 (__mmask16) __U);
509 extern __inline __m512i
510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
511 _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
513 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
514 (__v16si)
515 _mm512_setzero_si512 (),
516 (__mmask16) __U);
519 extern __inline void
520 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
521 _mm512_store_si512 (void *__P, __m512i __A)
523 *(__m512i *) __P = __A;
526 extern __inline void
527 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
528 _mm512_store_epi32 (void *__P, __m512i __A)
530 *(__m512i *) __P = __A;
533 extern __inline void
534 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
535 _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
537 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
538 (__mmask16) __U);
541 extern __inline __m512i
542 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
543 _mm512_mullo_epi32 (__m512i __A, __m512i __B)
545 return (__m512i) ((__v16su) __A * (__v16su) __B);
548 extern __inline __m512i
549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
550 _mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
552 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
553 (__v16si) __B,
554 (__v16si)
555 _mm512_setzero_si512 (),
556 __M);
559 extern __inline __m512i
560 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
561 _mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
563 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
564 (__v16si) __B,
565 (__v16si) __W, __M);
568 extern __inline __m512i
569 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
570 _mm512_sllv_epi32 (__m512i __X, __m512i __Y)
572 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
573 (__v16si) __Y,
574 (__v16si)
575 _mm512_undefined_epi32 (),
576 (__mmask16) -1);
579 extern __inline __m512i
580 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
581 _mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
583 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
584 (__v16si) __Y,
585 (__v16si) __W,
586 (__mmask16) __U);
589 extern __inline __m512i
590 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
591 _mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
593 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
594 (__v16si) __Y,
595 (__v16si)
596 _mm512_setzero_si512 (),
597 (__mmask16) __U);
600 extern __inline __m512i
601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
602 _mm512_srav_epi32 (__m512i __X, __m512i __Y)
604 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
605 (__v16si) __Y,
606 (__v16si)
607 _mm512_undefined_epi32 (),
608 (__mmask16) -1);
611 extern __inline __m512i
612 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
613 _mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
615 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
616 (__v16si) __Y,
617 (__v16si) __W,
618 (__mmask16) __U);
621 extern __inline __m512i
622 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
623 _mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
625 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
626 (__v16si) __Y,
627 (__v16si)
628 _mm512_setzero_si512 (),
629 (__mmask16) __U);
632 extern __inline __m512i
633 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
634 _mm512_srlv_epi32 (__m512i __X, __m512i __Y)
636 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
637 (__v16si) __Y,
638 (__v16si)
639 _mm512_undefined_epi32 (),
640 (__mmask16) -1);
643 extern __inline __m512i
644 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
645 _mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
647 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
648 (__v16si) __Y,
649 (__v16si) __W,
650 (__mmask16) __U);
653 extern __inline __m512i
654 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
655 _mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
657 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
658 (__v16si) __Y,
659 (__v16si)
660 _mm512_setzero_si512 (),
661 (__mmask16) __U);
664 extern __inline __m512i
665 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
666 _mm512_add_epi64 (__m512i __A, __m512i __B)
668 return (__m512i) ((__v8du) __A + (__v8du) __B);
671 extern __inline __m512i
672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
673 _mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
675 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
676 (__v8di) __B,
677 (__v8di) __W,
678 (__mmask8) __U);
681 extern __inline __m512i
682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
683 _mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
685 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
686 (__v8di) __B,
687 (__v8di)
688 _mm512_setzero_si512 (),
689 (__mmask8) __U);
692 extern __inline __m512i
693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
694 _mm512_sub_epi64 (__m512i __A, __m512i __B)
696 return (__m512i) ((__v8du) __A - (__v8du) __B);
699 extern __inline __m512i
700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
701 _mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
703 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
704 (__v8di) __B,
705 (__v8di) __W,
706 (__mmask8) __U);
709 extern __inline __m512i
710 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
711 _mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
713 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
714 (__v8di) __B,
715 (__v8di)
716 _mm512_setzero_si512 (),
717 (__mmask8) __U);
720 extern __inline __m512i
721 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
722 _mm512_sllv_epi64 (__m512i __X, __m512i __Y)
724 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
725 (__v8di) __Y,
726 (__v8di)
727 _mm512_undefined_pd (),
728 (__mmask8) -1);
731 extern __inline __m512i
732 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
733 _mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
735 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
736 (__v8di) __Y,
737 (__v8di) __W,
738 (__mmask8) __U);
741 extern __inline __m512i
742 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
743 _mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
745 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
746 (__v8di) __Y,
747 (__v8di)
748 _mm512_setzero_si512 (),
749 (__mmask8) __U);
752 extern __inline __m512i
753 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
754 _mm512_srav_epi64 (__m512i __X, __m512i __Y)
756 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
757 (__v8di) __Y,
758 (__v8di)
759 _mm512_undefined_epi32 (),
760 (__mmask8) -1);
763 extern __inline __m512i
764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
765 _mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
767 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
768 (__v8di) __Y,
769 (__v8di) __W,
770 (__mmask8) __U);
773 extern __inline __m512i
774 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
775 _mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
777 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
778 (__v8di) __Y,
779 (__v8di)
780 _mm512_setzero_si512 (),
781 (__mmask8) __U);
784 extern __inline __m512i
785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
786 _mm512_srlv_epi64 (__m512i __X, __m512i __Y)
788 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
789 (__v8di) __Y,
790 (__v8di)
791 _mm512_undefined_epi32 (),
792 (__mmask8) -1);
795 extern __inline __m512i
796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
797 _mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
799 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
800 (__v8di) __Y,
801 (__v8di) __W,
802 (__mmask8) __U);
805 extern __inline __m512i
806 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
807 _mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
809 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
810 (__v8di) __Y,
811 (__v8di)
812 _mm512_setzero_si512 (),
813 (__mmask8) __U);
816 extern __inline __m512i
817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
818 _mm512_add_epi32 (__m512i __A, __m512i __B)
820 return (__m512i) ((__v16su) __A + (__v16su) __B);
823 extern __inline __m512i
824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
825 _mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
827 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
828 (__v16si) __B,
829 (__v16si) __W,
830 (__mmask16) __U);
833 extern __inline __m512i
834 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
835 _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
837 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
838 (__v16si) __B,
839 (__v16si)
840 _mm512_setzero_si512 (),
841 (__mmask16) __U);
844 extern __inline __m512i
845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
846 _mm512_mul_epi32 (__m512i __X, __m512i __Y)
848 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
849 (__v16si) __Y,
850 (__v8di)
851 _mm512_undefined_epi32 (),
852 (__mmask8) -1);
855 extern __inline __m512i
856 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
857 _mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
859 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
860 (__v16si) __Y,
861 (__v8di) __W, __M);
864 extern __inline __m512i
865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
866 _mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
868 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
869 (__v16si) __Y,
870 (__v8di)
871 _mm512_setzero_si512 (),
872 __M);
875 extern __inline __m512i
876 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
877 _mm512_sub_epi32 (__m512i __A, __m512i __B)
879 return (__m512i) ((__v16su) __A - (__v16su) __B);
882 extern __inline __m512i
883 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
884 _mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
886 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
887 (__v16si) __B,
888 (__v16si) __W,
889 (__mmask16) __U);
892 extern __inline __m512i
893 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
894 _mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
896 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
897 (__v16si) __B,
898 (__v16si)
899 _mm512_setzero_si512 (),
900 (__mmask16) __U);
903 extern __inline __m512i
904 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
905 _mm512_mul_epu32 (__m512i __X, __m512i __Y)
907 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
908 (__v16si) __Y,
909 (__v8di)
910 _mm512_undefined_epi32 (),
911 (__mmask8) -1);
914 extern __inline __m512i
915 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
916 _mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
918 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
919 (__v16si) __Y,
920 (__v8di) __W, __M);
923 extern __inline __m512i
924 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
925 _mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
927 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
928 (__v16si) __Y,
929 (__v8di)
930 _mm512_setzero_si512 (),
931 __M);
934 #ifdef __OPTIMIZE__
935 extern __inline __m512i
936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
937 _mm512_slli_epi64 (__m512i __A, unsigned int __B)
939 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
940 (__v8di)
941 _mm512_undefined_epi32 (),
942 (__mmask8) -1);
945 extern __inline __m512i
946 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
947 _mm512_mask_slli_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
948 unsigned int __B)
950 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
951 (__v8di) __W,
952 (__mmask8) __U);
955 extern __inline __m512i
956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
957 _mm512_maskz_slli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
959 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
960 (__v8di)
961 _mm512_setzero_si512 (),
962 (__mmask8) __U);
964 #else
965 #define _mm512_slli_epi64(X, C) \
966 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
967 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
968 (__mmask8)-1))
970 #define _mm512_mask_slli_epi64(W, U, X, C) \
971 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
972 (__v8di)(__m512i)(W),\
973 (__mmask8)(U)))
975 #define _mm512_maskz_slli_epi64(U, X, C) \
976 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
977 (__v8di)(__m512i)_mm512_setzero_si512 (),\
978 (__mmask8)(U)))
979 #endif
981 extern __inline __m512i
982 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
983 _mm512_sll_epi64 (__m512i __A, __m128i __B)
985 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
986 (__v2di) __B,
987 (__v8di)
988 _mm512_undefined_epi32 (),
989 (__mmask8) -1);
992 extern __inline __m512i
993 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
994 _mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
996 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
997 (__v2di) __B,
998 (__v8di) __W,
999 (__mmask8) __U);
1002 extern __inline __m512i
1003 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1004 _mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1006 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
1007 (__v2di) __B,
1008 (__v8di)
1009 _mm512_setzero_si512 (),
1010 (__mmask8) __U);
1013 #ifdef __OPTIMIZE__
1014 extern __inline __m512i
1015 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1016 _mm512_srli_epi64 (__m512i __A, unsigned int __B)
1018 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1019 (__v8di)
1020 _mm512_undefined_epi32 (),
1021 (__mmask8) -1);
1024 extern __inline __m512i
1025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1026 _mm512_mask_srli_epi64 (__m512i __W, __mmask8 __U,
1027 __m512i __A, unsigned int __B)
1029 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1030 (__v8di) __W,
1031 (__mmask8) __U);
1034 extern __inline __m512i
1035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1036 _mm512_maskz_srli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1038 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1039 (__v8di)
1040 _mm512_setzero_si512 (),
1041 (__mmask8) __U);
1043 #else
1044 #define _mm512_srli_epi64(X, C) \
1045 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1046 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
1047 (__mmask8)-1))
1049 #define _mm512_mask_srli_epi64(W, U, X, C) \
1050 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1051 (__v8di)(__m512i)(W),\
1052 (__mmask8)(U)))
1054 #define _mm512_maskz_srli_epi64(U, X, C) \
1055 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1056 (__v8di)(__m512i)_mm512_setzero_si512 (),\
1057 (__mmask8)(U)))
1058 #endif
1060 extern __inline __m512i
1061 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1062 _mm512_srl_epi64 (__m512i __A, __m128i __B)
1064 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1065 (__v2di) __B,
1066 (__v8di)
1067 _mm512_undefined_epi32 (),
1068 (__mmask8) -1);
1071 extern __inline __m512i
1072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1073 _mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1075 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1076 (__v2di) __B,
1077 (__v8di) __W,
1078 (__mmask8) __U);
1081 extern __inline __m512i
1082 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1083 _mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1085 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1086 (__v2di) __B,
1087 (__v8di)
1088 _mm512_setzero_si512 (),
1089 (__mmask8) __U);
1092 #ifdef __OPTIMIZE__
1093 extern __inline __m512i
1094 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1095 _mm512_srai_epi64 (__m512i __A, unsigned int __B)
1097 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1098 (__v8di)
1099 _mm512_undefined_epi32 (),
1100 (__mmask8) -1);
1103 extern __inline __m512i
1104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1105 _mm512_mask_srai_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
1106 unsigned int __B)
1108 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1109 (__v8di) __W,
1110 (__mmask8) __U);
1113 extern __inline __m512i
1114 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1115 _mm512_maskz_srai_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1117 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1118 (__v8di)
1119 _mm512_setzero_si512 (),
1120 (__mmask8) __U);
1122 #else
1123 #define _mm512_srai_epi64(X, C) \
1124 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1125 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
1126 (__mmask8)-1))
1128 #define _mm512_mask_srai_epi64(W, U, X, C) \
1129 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1130 (__v8di)(__m512i)(W),\
1131 (__mmask8)(U)))
1133 #define _mm512_maskz_srai_epi64(U, X, C) \
1134 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1135 (__v8di)(__m512i)_mm512_setzero_si512 (),\
1136 (__mmask8)(U)))
1137 #endif
1139 extern __inline __m512i
1140 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1141 _mm512_sra_epi64 (__m512i __A, __m128i __B)
1143 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1144 (__v2di) __B,
1145 (__v8di)
1146 _mm512_undefined_epi32 (),
1147 (__mmask8) -1);
1150 extern __inline __m512i
1151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1152 _mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1154 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1155 (__v2di) __B,
1156 (__v8di) __W,
1157 (__mmask8) __U);
1160 extern __inline __m512i
1161 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1162 _mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1164 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1165 (__v2di) __B,
1166 (__v8di)
1167 _mm512_setzero_si512 (),
1168 (__mmask8) __U);
1171 #ifdef __OPTIMIZE__
1172 extern __inline __m512i
1173 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1174 _mm512_slli_epi32 (__m512i __A, unsigned int __B)
1176 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1177 (__v16si)
1178 _mm512_undefined_epi32 (),
1179 (__mmask16) -1);
1182 extern __inline __m512i
1183 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1184 _mm512_mask_slli_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1185 unsigned int __B)
1187 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1188 (__v16si) __W,
1189 (__mmask16) __U);
1192 extern __inline __m512i
1193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1194 _mm512_maskz_slli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1196 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1197 (__v16si)
1198 _mm512_setzero_si512 (),
1199 (__mmask16) __U);
1201 #else
1202 #define _mm512_slli_epi32(X, C) \
1203 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1204 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
1205 (__mmask16)-1))
1207 #define _mm512_mask_slli_epi32(W, U, X, C) \
1208 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1209 (__v16si)(__m512i)(W),\
1210 (__mmask16)(U)))
1212 #define _mm512_maskz_slli_epi32(U, X, C) \
1213 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1214 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1215 (__mmask16)(U)))
1216 #endif
1218 extern __inline __m512i
1219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1220 _mm512_sll_epi32 (__m512i __A, __m128i __B)
1222 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1223 (__v4si) __B,
1224 (__v16si)
1225 _mm512_undefined_epi32 (),
1226 (__mmask16) -1);
1229 extern __inline __m512i
1230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1231 _mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1233 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1234 (__v4si) __B,
1235 (__v16si) __W,
1236 (__mmask16) __U);
1239 extern __inline __m512i
1240 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1241 _mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1243 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1244 (__v4si) __B,
1245 (__v16si)
1246 _mm512_setzero_si512 (),
1247 (__mmask16) __U);
1250 #ifdef __OPTIMIZE__
1251 extern __inline __m512i
1252 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1253 _mm512_srli_epi32 (__m512i __A, unsigned int __B)
1255 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1256 (__v16si)
1257 _mm512_undefined_epi32 (),
1258 (__mmask16) -1);
1261 extern __inline __m512i
1262 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1263 _mm512_mask_srli_epi32 (__m512i __W, __mmask16 __U,
1264 __m512i __A, unsigned int __B)
1266 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1267 (__v16si) __W,
1268 (__mmask16) __U);
1271 extern __inline __m512i
1272 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1273 _mm512_maskz_srli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1275 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1276 (__v16si)
1277 _mm512_setzero_si512 (),
1278 (__mmask16) __U);
1280 #else
1281 #define _mm512_srli_epi32(X, C) \
1282 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1283 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
1284 (__mmask16)-1))
1286 #define _mm512_mask_srli_epi32(W, U, X, C) \
1287 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1288 (__v16si)(__m512i)(W),\
1289 (__mmask16)(U)))
1291 #define _mm512_maskz_srli_epi32(U, X, C) \
1292 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1293 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1294 (__mmask16)(U)))
1295 #endif
1297 extern __inline __m512i
1298 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1299 _mm512_srl_epi32 (__m512i __A, __m128i __B)
1301 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1302 (__v4si) __B,
1303 (__v16si)
1304 _mm512_undefined_epi32 (),
1305 (__mmask16) -1);
1308 extern __inline __m512i
1309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1310 _mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1312 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1313 (__v4si) __B,
1314 (__v16si) __W,
1315 (__mmask16) __U);
1318 extern __inline __m512i
1319 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1320 _mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1322 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1323 (__v4si) __B,
1324 (__v16si)
1325 _mm512_setzero_si512 (),
1326 (__mmask16) __U);
1329 #ifdef __OPTIMIZE__
1330 extern __inline __m512i
1331 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1332 _mm512_srai_epi32 (__m512i __A, unsigned int __B)
1334 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1335 (__v16si)
1336 _mm512_undefined_epi32 (),
1337 (__mmask16) -1);
1340 extern __inline __m512i
1341 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1342 _mm512_mask_srai_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1343 unsigned int __B)
1345 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1346 (__v16si) __W,
1347 (__mmask16) __U);
1350 extern __inline __m512i
1351 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1352 _mm512_maskz_srai_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1354 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1355 (__v16si)
1356 _mm512_setzero_si512 (),
1357 (__mmask16) __U);
1359 #else
1360 #define _mm512_srai_epi32(X, C) \
1361 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1362 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
1363 (__mmask16)-1))
1365 #define _mm512_mask_srai_epi32(W, U, X, C) \
1366 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1367 (__v16si)(__m512i)(W),\
1368 (__mmask16)(U)))
1370 #define _mm512_maskz_srai_epi32(U, X, C) \
1371 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1372 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1373 (__mmask16)(U)))
1374 #endif
1376 extern __inline __m512i
1377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1378 _mm512_sra_epi32 (__m512i __A, __m128i __B)
1380 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1381 (__v4si) __B,
1382 (__v16si)
1383 _mm512_undefined_epi32 (),
1384 (__mmask16) -1);
1387 extern __inline __m512i
1388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1389 _mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1391 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1392 (__v4si) __B,
1393 (__v16si) __W,
1394 (__mmask16) __U);
1397 extern __inline __m512i
1398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1399 _mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1401 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1402 (__v4si) __B,
1403 (__v16si)
1404 _mm512_setzero_si512 (),
1405 (__mmask16) __U);
1408 #ifdef __OPTIMIZE__
1409 extern __inline __m128d
1410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1411 _mm_add_round_sd (__m128d __A, __m128d __B, const int __R)
1413 return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
1414 (__v2df) __B,
1415 __R);
1418 extern __inline __m128d
1419 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1420 _mm_mask_add_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
1421 __m128d __B, const int __R)
1423 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
1424 (__v2df) __B,
1425 (__v2df) __W,
1426 (__mmask8) __U, __R);
1429 extern __inline __m128d
1430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1431 _mm_maskz_add_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
1432 const int __R)
1434 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
1435 (__v2df) __B,
1436 (__v2df)
1437 _mm_setzero_pd (),
1438 (__mmask8) __U, __R);
1441 extern __inline __m128
1442 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1443 _mm_add_round_ss (__m128 __A, __m128 __B, const int __R)
1445 return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
1446 (__v4sf) __B,
1447 __R);
1450 extern __inline __m128
1451 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1452 _mm_mask_add_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
1453 __m128 __B, const int __R)
1455 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
1456 (__v4sf) __B,
1457 (__v4sf) __W,
1458 (__mmask8) __U, __R);
1461 extern __inline __m128
1462 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1463 _mm_maskz_add_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
1464 const int __R)
1466 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
1467 (__v4sf) __B,
1468 (__v4sf)
1469 _mm_setzero_ps (),
1470 (__mmask8) __U, __R);
1473 extern __inline __m128d
1474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1475 _mm_sub_round_sd (__m128d __A, __m128d __B, const int __R)
1477 return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
1478 (__v2df) __B,
1479 __R);
1482 extern __inline __m128d
1483 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1484 _mm_mask_sub_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
1485 __m128d __B, const int __R)
1487 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
1488 (__v2df) __B,
1489 (__v2df) __W,
1490 (__mmask8) __U, __R);
1493 extern __inline __m128d
1494 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1495 _mm_maskz_sub_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
1496 const int __R)
1498 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
1499 (__v2df) __B,
1500 (__v2df)
1501 _mm_setzero_pd (),
1502 (__mmask8) __U, __R);
1505 extern __inline __m128
1506 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1507 _mm_sub_round_ss (__m128 __A, __m128 __B, const int __R)
1509 return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
1510 (__v4sf) __B,
1511 __R);
1514 extern __inline __m128
1515 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1516 _mm_mask_sub_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
1517 __m128 __B, const int __R)
1519 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
1520 (__v4sf) __B,
1521 (__v4sf) __W,
1522 (__mmask8) __U, __R);
1525 extern __inline __m128
1526 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1527 _mm_maskz_sub_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
1528 const int __R)
1530 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
1531 (__v4sf) __B,
1532 (__v4sf)
1533 _mm_setzero_ps (),
1534 (__mmask8) __U, __R);
1537 #else
1538 #define _mm_add_round_sd(A, B, C) \
1539 (__m128d)__builtin_ia32_addsd_round(A, B, C)
1541 #define _mm_mask_add_round_sd(W, U, A, B, C) \
1542 (__m128d)__builtin_ia32_addsd_mask_round(A, B, W, U, C)
1544 #define _mm_maskz_add_round_sd(U, A, B, C) \
1545 (__m128d)__builtin_ia32_addsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
1547 #define _mm_add_round_ss(A, B, C) \
1548 (__m128)__builtin_ia32_addss_round(A, B, C)
1550 #define _mm_mask_add_round_ss(W, U, A, B, C) \
1551 (__m128)__builtin_ia32_addss_mask_round(A, B, W, U, C)
1553 #define _mm_maskz_add_round_ss(U, A, B, C) \
1554 (__m128)__builtin_ia32_addss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
1556 #define _mm_sub_round_sd(A, B, C) \
1557 (__m128d)__builtin_ia32_subsd_round(A, B, C)
1559 #define _mm_mask_sub_round_sd(W, U, A, B, C) \
1560 (__m128d)__builtin_ia32_subsd_mask_round(A, B, W, U, C)
1562 #define _mm_maskz_sub_round_sd(U, A, B, C) \
1563 (__m128d)__builtin_ia32_subsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
1565 #define _mm_sub_round_ss(A, B, C) \
1566 (__m128)__builtin_ia32_subss_round(A, B, C)
1568 #define _mm_mask_sub_round_ss(W, U, A, B, C) \
1569 (__m128)__builtin_ia32_subss_mask_round(A, B, W, U, C)
1571 #define _mm_maskz_sub_round_ss(U, A, B, C) \
1572 (__m128)__builtin_ia32_subss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
1574 #endif
1576 #ifdef __OPTIMIZE__
1577 extern __inline __m512i
1578 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1579 _mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C,
1580 const int __imm)
1582 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1583 (__v8di) __B,
1584 (__v8di) __C, __imm,
1585 (__mmask8) -1);
1588 extern __inline __m512i
1589 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1590 _mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B,
1591 __m512i __C, const int __imm)
1593 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1594 (__v8di) __B,
1595 (__v8di) __C, __imm,
1596 (__mmask8) __U);
1599 extern __inline __m512i
1600 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1601 _mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
1602 __m512i __C, const int __imm)
1604 return (__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di) __A,
1605 (__v8di) __B,
1606 (__v8di) __C,
1607 __imm, (__mmask8) __U);
1610 extern __inline __m512i
1611 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1612 _mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C,
1613 const int __imm)
1615 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1616 (__v16si) __B,
1617 (__v16si) __C,
1618 __imm, (__mmask16) -1);
1621 extern __inline __m512i
1622 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1623 _mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
1624 __m512i __C, const int __imm)
1626 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1627 (__v16si) __B,
1628 (__v16si) __C,
1629 __imm, (__mmask16) __U);
1632 extern __inline __m512i
1633 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1634 _mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
1635 __m512i __C, const int __imm)
1637 return (__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si) __A,
1638 (__v16si) __B,
1639 (__v16si) __C,
1640 __imm, (__mmask16) __U);
1642 #else
1643 #define _mm512_ternarylogic_epi64(A, B, C, I) \
1644 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
1645 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)-1))
1646 #define _mm512_mask_ternarylogic_epi64(A, U, B, C, I) \
1647 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
1648 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1649 #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I) \
1650 ((__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di)(__m512i)(A), \
1651 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1652 #define _mm512_ternarylogic_epi32(A, B, C, I) \
1653 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
1654 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1655 (__mmask16)-1))
1656 #define _mm512_mask_ternarylogic_epi32(A, U, B, C, I) \
1657 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
1658 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1659 (__mmask16)(U)))
1660 #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I) \
1661 ((__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si)(__m512i)(A), \
1662 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1663 (__mmask16)(U)))
1664 #endif
1666 extern __inline __m512d
1667 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1668 _mm512_rcp14_pd (__m512d __A)
1670 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1671 (__v8df)
1672 _mm512_undefined_pd (),
1673 (__mmask8) -1);
1676 extern __inline __m512d
1677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1678 _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1680 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1681 (__v8df) __W,
1682 (__mmask8) __U);
1685 extern __inline __m512d
1686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1687 _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
1689 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1690 (__v8df)
1691 _mm512_setzero_pd (),
1692 (__mmask8) __U);
1695 extern __inline __m512
1696 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1697 _mm512_rcp14_ps (__m512 __A)
1699 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1700 (__v16sf)
1701 _mm512_undefined_ps (),
1702 (__mmask16) -1);
1705 extern __inline __m512
1706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1707 _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1709 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1710 (__v16sf) __W,
1711 (__mmask16) __U);
1714 extern __inline __m512
1715 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1716 _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
1718 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1719 (__v16sf)
1720 _mm512_setzero_ps (),
1721 (__mmask16) __U);
1724 extern __inline __m128d
1725 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1726 _mm_rcp14_sd (__m128d __A, __m128d __B)
1728 return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B,
1729 (__v2df) __A);
1732 extern __inline __m128d
1733 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1734 _mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1736 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __B,
1737 (__v2df) __A,
1738 (__v2df) __W,
1739 (__mmask8) __U);
1742 extern __inline __m128d
1743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1744 _mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1746 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __B,
1747 (__v2df) __A,
1748 (__v2df) _mm_setzero_ps (),
1749 (__mmask8) __U);
1752 extern __inline __m128
1753 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1754 _mm_rcp14_ss (__m128 __A, __m128 __B)
1756 return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B,
1757 (__v4sf) __A);
1760 extern __inline __m128
1761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1762 _mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1764 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __B,
1765 (__v4sf) __A,
1766 (__v4sf) __W,
1767 (__mmask8) __U);
1770 extern __inline __m128
1771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1772 _mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1774 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __B,
1775 (__v4sf) __A,
1776 (__v4sf) _mm_setzero_ps (),
1777 (__mmask8) __U);
1780 extern __inline __m512d
1781 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1782 _mm512_rsqrt14_pd (__m512d __A)
1784 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1785 (__v8df)
1786 _mm512_undefined_pd (),
1787 (__mmask8) -1);
1790 extern __inline __m512d
1791 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1792 _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1794 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1795 (__v8df) __W,
1796 (__mmask8) __U);
1799 extern __inline __m512d
1800 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1801 _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
1803 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1804 (__v8df)
1805 _mm512_setzero_pd (),
1806 (__mmask8) __U);
1809 extern __inline __m512
1810 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1811 _mm512_rsqrt14_ps (__m512 __A)
1813 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1814 (__v16sf)
1815 _mm512_undefined_ps (),
1816 (__mmask16) -1);
1819 extern __inline __m512
1820 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1821 _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1823 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1824 (__v16sf) __W,
1825 (__mmask16) __U);
1828 extern __inline __m512
1829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1830 _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
1832 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1833 (__v16sf)
1834 _mm512_setzero_ps (),
1835 (__mmask16) __U);
1838 extern __inline __m128d
1839 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1840 _mm_rsqrt14_sd (__m128d __A, __m128d __B)
1842 return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __B,
1843 (__v2df) __A);
1846 extern __inline __m128d
1847 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1848 _mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1850 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B,
1851 (__v2df) __A,
1852 (__v2df) __W,
1853 (__mmask8) __U);
1856 extern __inline __m128d
1857 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1858 _mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1860 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B,
1861 (__v2df) __A,
1862 (__v2df) _mm_setzero_pd (),
1863 (__mmask8) __U);
1866 extern __inline __m128
1867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1868 _mm_rsqrt14_ss (__m128 __A, __m128 __B)
1870 return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __B,
1871 (__v4sf) __A);
1874 extern __inline __m128
1875 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1876 _mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1878 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B,
1879 (__v4sf) __A,
1880 (__v4sf) __W,
1881 (__mmask8) __U);
1884 extern __inline __m128
1885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1886 _mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1888 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B,
1889 (__v4sf) __A,
1890 (__v4sf) _mm_setzero_ps (),
1891 (__mmask8) __U);
1894 #ifdef __OPTIMIZE__
1895 extern __inline __m512d
1896 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1897 _mm512_sqrt_round_pd (__m512d __A, const int __R)
1899 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1900 (__v8df)
1901 _mm512_undefined_pd (),
1902 (__mmask8) -1, __R);
1905 extern __inline __m512d
1906 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1907 _mm512_mask_sqrt_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
1908 const int __R)
1910 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1911 (__v8df) __W,
1912 (__mmask8) __U, __R);
1915 extern __inline __m512d
1916 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1917 _mm512_maskz_sqrt_round_pd (__mmask8 __U, __m512d __A, const int __R)
1919 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1920 (__v8df)
1921 _mm512_setzero_pd (),
1922 (__mmask8) __U, __R);
1925 extern __inline __m512
1926 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1927 _mm512_sqrt_round_ps (__m512 __A, const int __R)
1929 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1930 (__v16sf)
1931 _mm512_undefined_ps (),
1932 (__mmask16) -1, __R);
1935 extern __inline __m512
1936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1937 _mm512_mask_sqrt_round_ps (__m512 __W, __mmask16 __U, __m512 __A, const int __R)
1939 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1940 (__v16sf) __W,
1941 (__mmask16) __U, __R);
1944 extern __inline __m512
1945 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1946 _mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R)
1948 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1949 (__v16sf)
1950 _mm512_setzero_ps (),
1951 (__mmask16) __U, __R);
1954 extern __inline __m128d
1955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1956 _mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R)
1958 return (__m128d) __builtin_ia32_sqrtsd_round ((__v2df) __B,
1959 (__v2df) __A,
1960 __R);
1963 extern __inline __m128
1964 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1965 _mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R)
1967 return (__m128) __builtin_ia32_sqrtss_round ((__v4sf) __B,
1968 (__v4sf) __A,
1969 __R);
1971 #else
1972 #define _mm512_sqrt_round_pd(A, C) \
1973 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, C)
1975 #define _mm512_mask_sqrt_round_pd(W, U, A, C) \
1976 (__m512d)__builtin_ia32_sqrtpd512_mask(A, W, U, C)
1978 #define _mm512_maskz_sqrt_round_pd(U, A, C) \
1979 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
1981 #define _mm512_sqrt_round_ps(A, C) \
1982 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_undefined_ps(), -1, C)
1984 #define _mm512_mask_sqrt_round_ps(W, U, A, C) \
1985 (__m512)__builtin_ia32_sqrtps512_mask(A, W, U, C)
1987 #define _mm512_maskz_sqrt_round_ps(U, A, C) \
1988 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
1990 #define _mm_sqrt_round_sd(A, B, C) \
1991 (__m128d)__builtin_ia32_sqrtsd_round(A, B, C)
1993 #define _mm_sqrt_round_ss(A, B, C) \
1994 (__m128)__builtin_ia32_sqrtss_round(A, B, C)
1995 #endif
1997 extern __inline __m512i
1998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1999 _mm512_cvtepi8_epi32 (__m128i __A)
2001 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
2002 (__v16si)
2003 _mm512_undefined_epi32 (),
2004 (__mmask16) -1);
2007 extern __inline __m512i
2008 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2009 _mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
2011 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
2012 (__v16si) __W,
2013 (__mmask16) __U);
2016 extern __inline __m512i
2017 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2018 _mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A)
2020 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
2021 (__v16si)
2022 _mm512_setzero_si512 (),
2023 (__mmask16) __U);
2026 extern __inline __m512i
2027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2028 _mm512_cvtepi8_epi64 (__m128i __A)
2030 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
2031 (__v8di)
2032 _mm512_undefined_epi32 (),
2033 (__mmask8) -1);
2036 extern __inline __m512i
2037 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2038 _mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2040 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
2041 (__v8di) __W,
2042 (__mmask8) __U);
2045 extern __inline __m512i
2046 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2047 _mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
2049 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
2050 (__v8di)
2051 _mm512_setzero_si512 (),
2052 (__mmask8) __U);
2055 extern __inline __m512i
2056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2057 _mm512_cvtepi16_epi32 (__m256i __A)
2059 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
2060 (__v16si)
2061 _mm512_undefined_epi32 (),
2062 (__mmask16) -1);
2065 extern __inline __m512i
2066 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2067 _mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
2069 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
2070 (__v16si) __W,
2071 (__mmask16) __U);
2074 extern __inline __m512i
2075 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2076 _mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A)
2078 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
2079 (__v16si)
2080 _mm512_setzero_si512 (),
2081 (__mmask16) __U);
2084 extern __inline __m512i
2085 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2086 _mm512_cvtepi16_epi64 (__m128i __A)
2088 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
2089 (__v8di)
2090 _mm512_undefined_epi32 (),
2091 (__mmask8) -1);
2094 extern __inline __m512i
2095 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2096 _mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2098 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
2099 (__v8di) __W,
2100 (__mmask8) __U);
2103 extern __inline __m512i
2104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2105 _mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
2107 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
2108 (__v8di)
2109 _mm512_setzero_si512 (),
2110 (__mmask8) __U);
2113 extern __inline __m512i
2114 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2115 _mm512_cvtepi32_epi64 (__m256i __X)
2117 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
2118 (__v8di)
2119 _mm512_undefined_epi32 (),
2120 (__mmask8) -1);
2123 extern __inline __m512i
2124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2125 _mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
2127 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
2128 (__v8di) __W,
2129 (__mmask8) __U);
2132 extern __inline __m512i
2133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2134 _mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X)
2136 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
2137 (__v8di)
2138 _mm512_setzero_si512 (),
2139 (__mmask8) __U);
2142 extern __inline __m512i
2143 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2144 _mm512_cvtepu8_epi32 (__m128i __A)
2146 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
2147 (__v16si)
2148 _mm512_undefined_epi32 (),
2149 (__mmask16) -1);
2152 extern __inline __m512i
2153 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2154 _mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
2156 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
2157 (__v16si) __W,
2158 (__mmask16) __U);
2161 extern __inline __m512i
2162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2163 _mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A)
2165 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
2166 (__v16si)
2167 _mm512_setzero_si512 (),
2168 (__mmask16) __U);
2171 extern __inline __m512i
2172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2173 _mm512_cvtepu8_epi64 (__m128i __A)
2175 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
2176 (__v8di)
2177 _mm512_undefined_epi32 (),
2178 (__mmask8) -1);
2181 extern __inline __m512i
2182 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2183 _mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2185 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
2186 (__v8di) __W,
2187 (__mmask8) __U);
2190 extern __inline __m512i
2191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2192 _mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
2194 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
2195 (__v8di)
2196 _mm512_setzero_si512 (),
2197 (__mmask8) __U);
2200 extern __inline __m512i
2201 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2202 _mm512_cvtepu16_epi32 (__m256i __A)
2204 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2205 (__v16si)
2206 _mm512_undefined_epi32 (),
2207 (__mmask16) -1);
2210 extern __inline __m512i
2211 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2212 _mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
2214 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2215 (__v16si) __W,
2216 (__mmask16) __U);
2219 extern __inline __m512i
2220 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2221 _mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A)
2223 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2224 (__v16si)
2225 _mm512_setzero_si512 (),
2226 (__mmask16) __U);
2229 extern __inline __m512i
2230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2231 _mm512_cvtepu16_epi64 (__m128i __A)
2233 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2234 (__v8di)
2235 _mm512_undefined_epi32 (),
2236 (__mmask8) -1);
2239 extern __inline __m512i
2240 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2241 _mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2243 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2244 (__v8di) __W,
2245 (__mmask8) __U);
2248 extern __inline __m512i
2249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2250 _mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
2252 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2253 (__v8di)
2254 _mm512_setzero_si512 (),
2255 (__mmask8) __U);
2258 extern __inline __m512i
2259 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2260 _mm512_cvtepu32_epi64 (__m256i __X)
2262 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2263 (__v8di)
2264 _mm512_undefined_epi32 (),
2265 (__mmask8) -1);
2268 extern __inline __m512i
2269 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2270 _mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
2272 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2273 (__v8di) __W,
2274 (__mmask8) __U);
2277 extern __inline __m512i
2278 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2279 _mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X)
2281 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2282 (__v8di)
2283 _mm512_setzero_si512 (),
2284 (__mmask8) __U);
2287 #ifdef __OPTIMIZE__
2288 extern __inline __m512d
2289 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2290 _mm512_add_round_pd (__m512d __A, __m512d __B, const int __R)
2292 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2293 (__v8df) __B,
2294 (__v8df)
2295 _mm512_undefined_pd (),
2296 (__mmask8) -1, __R);
2299 extern __inline __m512d
2300 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2301 _mm512_mask_add_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2302 __m512d __B, const int __R)
2304 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2305 (__v8df) __B,
2306 (__v8df) __W,
2307 (__mmask8) __U, __R);
2310 extern __inline __m512d
2311 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2312 _mm512_maskz_add_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2313 const int __R)
2315 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2316 (__v8df) __B,
2317 (__v8df)
2318 _mm512_setzero_pd (),
2319 (__mmask8) __U, __R);
2322 extern __inline __m512
2323 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2324 _mm512_add_round_ps (__m512 __A, __m512 __B, const int __R)
2326 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2327 (__v16sf) __B,
2328 (__v16sf)
2329 _mm512_undefined_ps (),
2330 (__mmask16) -1, __R);
2333 extern __inline __m512
2334 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2335 _mm512_mask_add_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2336 __m512 __B, const int __R)
2338 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2339 (__v16sf) __B,
2340 (__v16sf) __W,
2341 (__mmask16) __U, __R);
2344 extern __inline __m512
2345 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2346 _mm512_maskz_add_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2348 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2349 (__v16sf) __B,
2350 (__v16sf)
2351 _mm512_setzero_ps (),
2352 (__mmask16) __U, __R);
2355 extern __inline __m512d
2356 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2357 _mm512_sub_round_pd (__m512d __A, __m512d __B, const int __R)
2359 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2360 (__v8df) __B,
2361 (__v8df)
2362 _mm512_undefined_pd (),
2363 (__mmask8) -1, __R);
2366 extern __inline __m512d
2367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2368 _mm512_mask_sub_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2369 __m512d __B, const int __R)
2371 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2372 (__v8df) __B,
2373 (__v8df) __W,
2374 (__mmask8) __U, __R);
2377 extern __inline __m512d
2378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2379 _mm512_maskz_sub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2380 const int __R)
2382 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2383 (__v8df) __B,
2384 (__v8df)
2385 _mm512_setzero_pd (),
2386 (__mmask8) __U, __R);
2389 extern __inline __m512
2390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2391 _mm512_sub_round_ps (__m512 __A, __m512 __B, const int __R)
2393 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2394 (__v16sf) __B,
2395 (__v16sf)
2396 _mm512_undefined_ps (),
2397 (__mmask16) -1, __R);
2400 extern __inline __m512
2401 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2402 _mm512_mask_sub_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2403 __m512 __B, const int __R)
2405 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2406 (__v16sf) __B,
2407 (__v16sf) __W,
2408 (__mmask16) __U, __R);
2411 extern __inline __m512
2412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2413 _mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2415 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2416 (__v16sf) __B,
2417 (__v16sf)
2418 _mm512_setzero_ps (),
2419 (__mmask16) __U, __R);
2421 #else
2422 #define _mm512_add_round_pd(A, B, C) \
2423 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2425 #define _mm512_mask_add_round_pd(W, U, A, B, C) \
2426 (__m512d)__builtin_ia32_addpd512_mask(A, B, W, U, C)
2428 #define _mm512_maskz_add_round_pd(U, A, B, C) \
2429 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2431 #define _mm512_add_round_ps(A, B, C) \
2432 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2434 #define _mm512_mask_add_round_ps(W, U, A, B, C) \
2435 (__m512)__builtin_ia32_addps512_mask(A, B, W, U, C)
2437 #define _mm512_maskz_add_round_ps(U, A, B, C) \
2438 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2440 #define _mm512_sub_round_pd(A, B, C) \
2441 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2443 #define _mm512_mask_sub_round_pd(W, U, A, B, C) \
2444 (__m512d)__builtin_ia32_subpd512_mask(A, B, W, U, C)
2446 #define _mm512_maskz_sub_round_pd(U, A, B, C) \
2447 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2449 #define _mm512_sub_round_ps(A, B, C) \
2450 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2452 #define _mm512_mask_sub_round_ps(W, U, A, B, C) \
2453 (__m512)__builtin_ia32_subps512_mask(A, B, W, U, C)
2455 #define _mm512_maskz_sub_round_ps(U, A, B, C) \
2456 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2457 #endif
2459 #ifdef __OPTIMIZE__
2460 extern __inline __m512d
2461 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2462 _mm512_mul_round_pd (__m512d __A, __m512d __B, const int __R)
2464 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2465 (__v8df) __B,
2466 (__v8df)
2467 _mm512_undefined_pd (),
2468 (__mmask8) -1, __R);
2471 extern __inline __m512d
2472 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2473 _mm512_mask_mul_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2474 __m512d __B, const int __R)
2476 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2477 (__v8df) __B,
2478 (__v8df) __W,
2479 (__mmask8) __U, __R);
2482 extern __inline __m512d
2483 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2484 _mm512_maskz_mul_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2485 const int __R)
2487 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2488 (__v8df) __B,
2489 (__v8df)
2490 _mm512_setzero_pd (),
2491 (__mmask8) __U, __R);
2494 extern __inline __m512
2495 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2496 _mm512_mul_round_ps (__m512 __A, __m512 __B, const int __R)
2498 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2499 (__v16sf) __B,
2500 (__v16sf)
2501 _mm512_undefined_ps (),
2502 (__mmask16) -1, __R);
2505 extern __inline __m512
2506 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2507 _mm512_mask_mul_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2508 __m512 __B, const int __R)
2510 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2511 (__v16sf) __B,
2512 (__v16sf) __W,
2513 (__mmask16) __U, __R);
2516 extern __inline __m512
2517 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2518 _mm512_maskz_mul_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2520 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2521 (__v16sf) __B,
2522 (__v16sf)
2523 _mm512_setzero_ps (),
2524 (__mmask16) __U, __R);
2527 extern __inline __m512d
2528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2529 _mm512_div_round_pd (__m512d __M, __m512d __V, const int __R)
2531 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2532 (__v8df) __V,
2533 (__v8df)
2534 _mm512_undefined_pd (),
2535 (__mmask8) -1, __R);
2538 extern __inline __m512d
2539 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2540 _mm512_mask_div_round_pd (__m512d __W, __mmask8 __U, __m512d __M,
2541 __m512d __V, const int __R)
2543 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2544 (__v8df) __V,
2545 (__v8df) __W,
2546 (__mmask8) __U, __R);
2549 extern __inline __m512d
2550 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2551 _mm512_maskz_div_round_pd (__mmask8 __U, __m512d __M, __m512d __V,
2552 const int __R)
2554 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2555 (__v8df) __V,
2556 (__v8df)
2557 _mm512_setzero_pd (),
2558 (__mmask8) __U, __R);
2561 extern __inline __m512
2562 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2563 _mm512_div_round_ps (__m512 __A, __m512 __B, const int __R)
2565 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2566 (__v16sf) __B,
2567 (__v16sf)
2568 _mm512_undefined_ps (),
2569 (__mmask16) -1, __R);
2572 extern __inline __m512
2573 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2574 _mm512_mask_div_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2575 __m512 __B, const int __R)
2577 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2578 (__v16sf) __B,
2579 (__v16sf) __W,
2580 (__mmask16) __U, __R);
2583 extern __inline __m512
2584 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2585 _mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2587 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2588 (__v16sf) __B,
2589 (__v16sf)
2590 _mm512_setzero_ps (),
2591 (__mmask16) __U, __R);
2594 extern __inline __m128d
2595 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2596 _mm_mul_round_sd (__m128d __A, __m128d __B, const int __R)
2598 return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
2599 (__v2df) __B,
2600 __R);
2603 extern __inline __m128d
2604 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2605 _mm_mask_mul_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
2606 __m128d __B, const int __R)
2608 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
2609 (__v2df) __B,
2610 (__v2df) __W,
2611 (__mmask8) __U, __R);
2614 extern __inline __m128d
2615 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2616 _mm_maskz_mul_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
2617 const int __R)
2619 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
2620 (__v2df) __B,
2621 (__v2df)
2622 _mm_setzero_pd (),
2623 (__mmask8) __U, __R);
2626 extern __inline __m128
2627 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2628 _mm_mul_round_ss (__m128 __A, __m128 __B, const int __R)
2630 return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
2631 (__v4sf) __B,
2632 __R);
2635 extern __inline __m128
2636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2637 _mm_mask_mul_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
2638 __m128 __B, const int __R)
2640 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
2641 (__v4sf) __B,
2642 (__v4sf) __W,
2643 (__mmask8) __U, __R);
2646 extern __inline __m128
2647 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2648 _mm_maskz_mul_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
2649 const int __R)
2651 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
2652 (__v4sf) __B,
2653 (__v4sf)
2654 _mm_setzero_ps (),
2655 (__mmask8) __U, __R);
2658 extern __inline __m128d
2659 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2660 _mm_div_round_sd (__m128d __A, __m128d __B, const int __R)
2662 return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
2663 (__v2df) __B,
2664 __R);
2667 extern __inline __m128d
2668 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2669 _mm_mask_div_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
2670 __m128d __B, const int __R)
2672 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
2673 (__v2df) __B,
2674 (__v2df) __W,
2675 (__mmask8) __U, __R);
2678 extern __inline __m128d
2679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2680 _mm_maskz_div_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
2681 const int __R)
2683 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
2684 (__v2df) __B,
2685 (__v2df)
2686 _mm_setzero_pd (),
2687 (__mmask8) __U, __R);
2690 extern __inline __m128
2691 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2692 _mm_div_round_ss (__m128 __A, __m128 __B, const int __R)
2694 return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
2695 (__v4sf) __B,
2696 __R);
2699 extern __inline __m128
2700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2701 _mm_mask_div_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
2702 __m128 __B, const int __R)
2704 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
2705 (__v4sf) __B,
2706 (__v4sf) __W,
2707 (__mmask8) __U, __R);
2710 extern __inline __m128
2711 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2712 _mm_maskz_div_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
2713 const int __R)
2715 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
2716 (__v4sf) __B,
2717 (__v4sf)
2718 _mm_setzero_ps (),
2719 (__mmask8) __U, __R);
2722 #else
2723 #define _mm512_mul_round_pd(A, B, C) \
2724 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2726 #define _mm512_mask_mul_round_pd(W, U, A, B, C) \
2727 (__m512d)__builtin_ia32_mulpd512_mask(A, B, W, U, C)
2729 #define _mm512_maskz_mul_round_pd(U, A, B, C) \
2730 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2732 #define _mm512_mul_round_ps(A, B, C) \
2733 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2735 #define _mm512_mask_mul_round_ps(W, U, A, B, C) \
2736 (__m512)__builtin_ia32_mulps512_mask(A, B, W, U, C)
2738 #define _mm512_maskz_mul_round_ps(U, A, B, C) \
2739 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2741 #define _mm512_div_round_pd(A, B, C) \
2742 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2744 #define _mm512_mask_div_round_pd(W, U, A, B, C) \
2745 (__m512d)__builtin_ia32_divpd512_mask(A, B, W, U, C)
2747 #define _mm512_maskz_div_round_pd(U, A, B, C) \
2748 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2750 #define _mm512_div_round_ps(A, B, C) \
2751 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2753 #define _mm512_mask_div_round_ps(W, U, A, B, C) \
2754 (__m512)__builtin_ia32_divps512_mask(A, B, W, U, C)
2756 #define _mm512_maskz_div_round_ps(U, A, B, C) \
2757 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2759 #define _mm_mul_round_sd(A, B, C) \
2760 (__m128d)__builtin_ia32_mulsd_round(A, B, C)
2762 #define _mm_mask_mul_round_sd(W, U, A, B, C) \
2763 (__m128d)__builtin_ia32_mulsd_mask_round(A, B, W, U, C)
2765 #define _mm_maskz_mul_round_sd(U, A, B, C) \
2766 (__m128d)__builtin_ia32_mulsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
2768 #define _mm_mul_round_ss(A, B, C) \
2769 (__m128)__builtin_ia32_mulss_round(A, B, C)
2771 #define _mm_mask_mul_round_ss(W, U, A, B, C) \
2772 (__m128)__builtin_ia32_mulss_mask_round(A, B, W, U, C)
2774 #define _mm_maskz_mul_round_ss(U, A, B, C) \
2775 (__m128)__builtin_ia32_mulss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
2777 #define _mm_div_round_sd(A, B, C) \
2778 (__m128d)__builtin_ia32_divsd_round(A, B, C)
2780 #define _mm_mask_div_round_sd(W, U, A, B, C) \
2781 (__m128d)__builtin_ia32_divsd_mask_round(A, B, W, U, C)
2783 #define _mm_maskz_div_round_sd(U, A, B, C) \
2784 (__m128d)__builtin_ia32_divsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
2786 #define _mm_div_round_ss(A, B, C) \
2787 (__m128)__builtin_ia32_divss_round(A, B, C)
2789 #define _mm_mask_div_round_ss(W, U, A, B, C) \
2790 (__m128)__builtin_ia32_divss_mask_round(A, B, W, U, C)
2792 #define _mm_maskz_div_round_ss(U, A, B, C) \
2793 (__m128)__builtin_ia32_divss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
2795 #endif
2797 #ifdef __OPTIMIZE__
2798 extern __inline __m512d
2799 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2800 _mm512_max_round_pd (__m512d __A, __m512d __B, const int __R)
2802 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2803 (__v8df) __B,
2804 (__v8df)
2805 _mm512_undefined_pd (),
2806 (__mmask8) -1, __R);
2809 extern __inline __m512d
2810 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2811 _mm512_mask_max_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2812 __m512d __B, const int __R)
2814 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2815 (__v8df) __B,
2816 (__v8df) __W,
2817 (__mmask8) __U, __R);
2820 extern __inline __m512d
2821 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2822 _mm512_maskz_max_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2823 const int __R)
2825 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2826 (__v8df) __B,
2827 (__v8df)
2828 _mm512_setzero_pd (),
2829 (__mmask8) __U, __R);
2832 extern __inline __m512
2833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2834 _mm512_max_round_ps (__m512 __A, __m512 __B, const int __R)
2836 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2837 (__v16sf) __B,
2838 (__v16sf)
2839 _mm512_undefined_ps (),
2840 (__mmask16) -1, __R);
2843 extern __inline __m512
2844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2845 _mm512_mask_max_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2846 __m512 __B, const int __R)
2848 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2849 (__v16sf) __B,
2850 (__v16sf) __W,
2851 (__mmask16) __U, __R);
2854 extern __inline __m512
2855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2856 _mm512_maskz_max_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2858 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2859 (__v16sf) __B,
2860 (__v16sf)
2861 _mm512_setzero_ps (),
2862 (__mmask16) __U, __R);
2865 extern __inline __m512d
2866 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2867 _mm512_min_round_pd (__m512d __A, __m512d __B, const int __R)
2869 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2870 (__v8df) __B,
2871 (__v8df)
2872 _mm512_undefined_pd (),
2873 (__mmask8) -1, __R);
2876 extern __inline __m512d
2877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2878 _mm512_mask_min_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2879 __m512d __B, const int __R)
2881 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2882 (__v8df) __B,
2883 (__v8df) __W,
2884 (__mmask8) __U, __R);
2887 extern __inline __m512d
2888 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2889 _mm512_maskz_min_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2890 const int __R)
2892 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2893 (__v8df) __B,
2894 (__v8df)
2895 _mm512_setzero_pd (),
2896 (__mmask8) __U, __R);
2899 extern __inline __m512
2900 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2901 _mm512_min_round_ps (__m512 __A, __m512 __B, const int __R)
2903 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2904 (__v16sf) __B,
2905 (__v16sf)
2906 _mm512_undefined_ps (),
2907 (__mmask16) -1, __R);
2910 extern __inline __m512
2911 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2912 _mm512_mask_min_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2913 __m512 __B, const int __R)
2915 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2916 (__v16sf) __B,
2917 (__v16sf) __W,
2918 (__mmask16) __U, __R);
2921 extern __inline __m512
2922 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2923 _mm512_maskz_min_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2925 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2926 (__v16sf) __B,
2927 (__v16sf)
2928 _mm512_setzero_ps (),
2929 (__mmask16) __U, __R);
2931 #else
2932 #define _mm512_max_round_pd(A, B, R) \
2933 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
2935 #define _mm512_mask_max_round_pd(W, U, A, B, R) \
2936 (__m512d)__builtin_ia32_maxpd512_mask(A, B, W, U, R)
2938 #define _mm512_maskz_max_round_pd(U, A, B, R) \
2939 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
2941 #define _mm512_max_round_ps(A, B, R) \
2942 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_undefined_pd(), -1, R)
2944 #define _mm512_mask_max_round_ps(W, U, A, B, R) \
2945 (__m512)__builtin_ia32_maxps512_mask(A, B, W, U, R)
2947 #define _mm512_maskz_max_round_ps(U, A, B, R) \
2948 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
2950 #define _mm512_min_round_pd(A, B, R) \
2951 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
2953 #define _mm512_mask_min_round_pd(W, U, A, B, R) \
2954 (__m512d)__builtin_ia32_minpd512_mask(A, B, W, U, R)
2956 #define _mm512_maskz_min_round_pd(U, A, B, R) \
2957 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
2959 #define _mm512_min_round_ps(A, B, R) \
2960 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, R)
2962 #define _mm512_mask_min_round_ps(W, U, A, B, R) \
2963 (__m512)__builtin_ia32_minps512_mask(A, B, W, U, R)
2965 #define _mm512_maskz_min_round_ps(U, A, B, R) \
2966 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
2967 #endif
2969 #ifdef __OPTIMIZE__
2970 extern __inline __m512d
2971 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2972 _mm512_scalef_round_pd (__m512d __A, __m512d __B, const int __R)
2974 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2975 (__v8df) __B,
2976 (__v8df)
2977 _mm512_undefined_pd (),
2978 (__mmask8) -1, __R);
2981 extern __inline __m512d
2982 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2983 _mm512_mask_scalef_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2984 __m512d __B, const int __R)
2986 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2987 (__v8df) __B,
2988 (__v8df) __W,
2989 (__mmask8) __U, __R);
2992 extern __inline __m512d
2993 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2994 _mm512_maskz_scalef_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2995 const int __R)
2997 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2998 (__v8df) __B,
2999 (__v8df)
3000 _mm512_setzero_pd (),
3001 (__mmask8) __U, __R);
3004 extern __inline __m512
3005 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3006 _mm512_scalef_round_ps (__m512 __A, __m512 __B, const int __R)
3008 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
3009 (__v16sf) __B,
3010 (__v16sf)
3011 _mm512_undefined_ps (),
3012 (__mmask16) -1, __R);
3015 extern __inline __m512
3016 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3017 _mm512_mask_scalef_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
3018 __m512 __B, const int __R)
3020 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
3021 (__v16sf) __B,
3022 (__v16sf) __W,
3023 (__mmask16) __U, __R);
3026 extern __inline __m512
3027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3028 _mm512_maskz_scalef_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3029 const int __R)
3031 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
3032 (__v16sf) __B,
3033 (__v16sf)
3034 _mm512_setzero_ps (),
3035 (__mmask16) __U, __R);
3038 extern __inline __m128d
3039 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3040 _mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R)
3042 return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
3043 (__v2df) __B,
3044 __R);
3047 extern __inline __m128
3048 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3049 _mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R)
3051 return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
3052 (__v4sf) __B,
3053 __R);
3055 #else
3056 #define _mm512_scalef_round_pd(A, B, C) \
3057 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
3059 #define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
3060 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C)
3062 #define _mm512_maskz_scalef_round_pd(U, A, B, C) \
3063 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
3065 #define _mm512_scalef_round_ps(A, B, C) \
3066 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
3068 #define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
3069 (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C)
3071 #define _mm512_maskz_scalef_round_ps(U, A, B, C) \
3072 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
3074 #define _mm_scalef_round_sd(A, B, C) \
3075 (__m128d)__builtin_ia32_scalefsd_round(A, B, C)
3077 #define _mm_scalef_round_ss(A, B, C) \
3078 (__m128)__builtin_ia32_scalefss_round(A, B, C)
3079 #endif
3081 #ifdef __OPTIMIZE__
3082 extern __inline __m512d
3083 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3084 _mm512_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3086 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3087 (__v8df) __B,
3088 (__v8df) __C,
3089 (__mmask8) -1, __R);
3092 extern __inline __m512d
3093 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3094 _mm512_mask_fmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3095 __m512d __C, const int __R)
3097 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3098 (__v8df) __B,
3099 (__v8df) __C,
3100 (__mmask8) __U, __R);
3103 extern __inline __m512d
3104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3105 _mm512_mask3_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3106 __mmask8 __U, const int __R)
3108 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
3109 (__v8df) __B,
3110 (__v8df) __C,
3111 (__mmask8) __U, __R);
3114 extern __inline __m512d
3115 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3116 _mm512_maskz_fmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3117 __m512d __C, const int __R)
3119 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
3120 (__v8df) __B,
3121 (__v8df) __C,
3122 (__mmask8) __U, __R);
3125 extern __inline __m512
3126 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3127 _mm512_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3129 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3130 (__v16sf) __B,
3131 (__v16sf) __C,
3132 (__mmask16) -1, __R);
3135 extern __inline __m512
3136 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3137 _mm512_mask_fmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3138 __m512 __C, const int __R)
3140 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3141 (__v16sf) __B,
3142 (__v16sf) __C,
3143 (__mmask16) __U, __R);
3146 extern __inline __m512
3147 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3148 _mm512_mask3_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3149 __mmask16 __U, const int __R)
3151 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
3152 (__v16sf) __B,
3153 (__v16sf) __C,
3154 (__mmask16) __U, __R);
3157 extern __inline __m512
3158 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3159 _mm512_maskz_fmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3160 __m512 __C, const int __R)
3162 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
3163 (__v16sf) __B,
3164 (__v16sf) __C,
3165 (__mmask16) __U, __R);
3168 extern __inline __m512d
3169 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3170 _mm512_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3172 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3173 (__v8df) __B,
3174 -(__v8df) __C,
3175 (__mmask8) -1, __R);
3178 extern __inline __m512d
3179 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3180 _mm512_mask_fmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3181 __m512d __C, const int __R)
3183 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3184 (__v8df) __B,
3185 -(__v8df) __C,
3186 (__mmask8) __U, __R);
3189 extern __inline __m512d
3190 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3191 _mm512_mask3_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3192 __mmask8 __U, const int __R)
3194 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
3195 (__v8df) __B,
3196 (__v8df) __C,
3197 (__mmask8) __U, __R);
3200 extern __inline __m512d
3201 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3202 _mm512_maskz_fmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3203 __m512d __C, const int __R)
3205 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
3206 (__v8df) __B,
3207 -(__v8df) __C,
3208 (__mmask8) __U, __R);
3211 extern __inline __m512
3212 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3213 _mm512_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3215 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3216 (__v16sf) __B,
3217 -(__v16sf) __C,
3218 (__mmask16) -1, __R);
3221 extern __inline __m512
3222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3223 _mm512_mask_fmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3224 __m512 __C, const int __R)
3226 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3227 (__v16sf) __B,
3228 -(__v16sf) __C,
3229 (__mmask16) __U, __R);
3232 extern __inline __m512
3233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3234 _mm512_mask3_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3235 __mmask16 __U, const int __R)
3237 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
3238 (__v16sf) __B,
3239 (__v16sf) __C,
3240 (__mmask16) __U, __R);
3243 extern __inline __m512
3244 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3245 _mm512_maskz_fmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3246 __m512 __C, const int __R)
3248 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
3249 (__v16sf) __B,
3250 -(__v16sf) __C,
3251 (__mmask16) __U, __R);
3254 extern __inline __m512d
3255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3256 _mm512_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3258 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3259 (__v8df) __B,
3260 (__v8df) __C,
3261 (__mmask8) -1, __R);
3264 extern __inline __m512d
3265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3266 _mm512_mask_fmaddsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3267 __m512d __C, const int __R)
3269 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3270 (__v8df) __B,
3271 (__v8df) __C,
3272 (__mmask8) __U, __R);
3275 extern __inline __m512d
3276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3277 _mm512_mask3_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3278 __mmask8 __U, const int __R)
3280 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
3281 (__v8df) __B,
3282 (__v8df) __C,
3283 (__mmask8) __U, __R);
3286 extern __inline __m512d
3287 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3288 _mm512_maskz_fmaddsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3289 __m512d __C, const int __R)
3291 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3292 (__v8df) __B,
3293 (__v8df) __C,
3294 (__mmask8) __U, __R);
3297 extern __inline __m512
3298 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3299 _mm512_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3301 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3302 (__v16sf) __B,
3303 (__v16sf) __C,
3304 (__mmask16) -1, __R);
3307 extern __inline __m512
3308 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3309 _mm512_mask_fmaddsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3310 __m512 __C, const int __R)
3312 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3313 (__v16sf) __B,
3314 (__v16sf) __C,
3315 (__mmask16) __U, __R);
3318 extern __inline __m512
3319 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3320 _mm512_mask3_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3321 __mmask16 __U, const int __R)
3323 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
3324 (__v16sf) __B,
3325 (__v16sf) __C,
3326 (__mmask16) __U, __R);
3329 extern __inline __m512
3330 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3331 _mm512_maskz_fmaddsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3332 __m512 __C, const int __R)
3334 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3335 (__v16sf) __B,
3336 (__v16sf) __C,
3337 (__mmask16) __U, __R);
3340 extern __inline __m512d
3341 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3342 _mm512_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3344 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3345 (__v8df) __B,
3346 -(__v8df) __C,
3347 (__mmask8) -1, __R);
3350 extern __inline __m512d
3351 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3352 _mm512_mask_fmsubadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3353 __m512d __C, const int __R)
3355 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3356 (__v8df) __B,
3357 -(__v8df) __C,
3358 (__mmask8) __U, __R);
3361 extern __inline __m512d
3362 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3363 _mm512_mask3_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3364 __mmask8 __U, const int __R)
3366 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3367 (__v8df) __B,
3368 (__v8df) __C,
3369 (__mmask8) __U, __R);
3372 extern __inline __m512d
3373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3374 _mm512_maskz_fmsubadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3375 __m512d __C, const int __R)
3377 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3378 (__v8df) __B,
3379 -(__v8df) __C,
3380 (__mmask8) __U, __R);
3383 extern __inline __m512
3384 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3385 _mm512_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3387 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3388 (__v16sf) __B,
3389 -(__v16sf) __C,
3390 (__mmask16) -1, __R);
3393 extern __inline __m512
3394 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3395 _mm512_mask_fmsubadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3396 __m512 __C, const int __R)
3398 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3399 (__v16sf) __B,
3400 -(__v16sf) __C,
3401 (__mmask16) __U, __R);
3404 extern __inline __m512
3405 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3406 _mm512_mask3_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3407 __mmask16 __U, const int __R)
3409 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3410 (__v16sf) __B,
3411 (__v16sf) __C,
3412 (__mmask16) __U, __R);
3415 extern __inline __m512
3416 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3417 _mm512_maskz_fmsubadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3418 __m512 __C, const int __R)
3420 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3421 (__v16sf) __B,
3422 -(__v16sf) __C,
3423 (__mmask16) __U, __R);
3426 extern __inline __m512d
3427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3428 _mm512_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3430 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
3431 (__v8df) __B,
3432 (__v8df) __C,
3433 (__mmask8) -1, __R);
3436 extern __inline __m512d
3437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3438 _mm512_mask_fnmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3439 __m512d __C, const int __R)
3441 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
3442 (__v8df) __B,
3443 (__v8df) __C,
3444 (__mmask8) __U, __R);
3447 extern __inline __m512d
3448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3449 _mm512_mask3_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3450 __mmask8 __U, const int __R)
3452 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
3453 (__v8df) __B,
3454 (__v8df) __C,
3455 (__mmask8) __U, __R);
3458 extern __inline __m512d
3459 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3460 _mm512_maskz_fnmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3461 __m512d __C, const int __R)
3463 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
3464 (__v8df) __B,
3465 (__v8df) __C,
3466 (__mmask8) __U, __R);
3469 extern __inline __m512
3470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3471 _mm512_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3473 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3474 (__v16sf) __B,
3475 (__v16sf) __C,
3476 (__mmask16) -1, __R);
3479 extern __inline __m512
3480 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3481 _mm512_mask_fnmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3482 __m512 __C, const int __R)
3484 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
3485 (__v16sf) __B,
3486 (__v16sf) __C,
3487 (__mmask16) __U, __R);
3490 extern __inline __m512
3491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3492 _mm512_mask3_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3493 __mmask16 __U, const int __R)
3495 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
3496 (__v16sf) __B,
3497 (__v16sf) __C,
3498 (__mmask16) __U, __R);
3501 extern __inline __m512
3502 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3503 _mm512_maskz_fnmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3504 __m512 __C, const int __R)
3506 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3507 (__v16sf) __B,
3508 (__v16sf) __C,
3509 (__mmask16) __U, __R);
3512 extern __inline __m512d
3513 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3514 _mm512_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3516 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
3517 (__v8df) __B,
3518 -(__v8df) __C,
3519 (__mmask8) -1, __R);
3522 extern __inline __m512d
3523 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3524 _mm512_mask_fnmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3525 __m512d __C, const int __R)
3527 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
3528 (__v8df) __B,
3529 (__v8df) __C,
3530 (__mmask8) __U, __R);
3533 extern __inline __m512d
3534 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3535 _mm512_mask3_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3536 __mmask8 __U, const int __R)
3538 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
3539 (__v8df) __B,
3540 (__v8df) __C,
3541 (__mmask8) __U, __R);
3544 extern __inline __m512d
3545 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3546 _mm512_maskz_fnmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3547 __m512d __C, const int __R)
3549 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
3550 (__v8df) __B,
3551 -(__v8df) __C,
3552 (__mmask8) __U, __R);
3555 extern __inline __m512
3556 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3557 _mm512_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3559 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3560 (__v16sf) __B,
3561 -(__v16sf) __C,
3562 (__mmask16) -1, __R);
3565 extern __inline __m512
3566 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3567 _mm512_mask_fnmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3568 __m512 __C, const int __R)
3570 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
3571 (__v16sf) __B,
3572 (__v16sf) __C,
3573 (__mmask16) __U, __R);
3576 extern __inline __m512
3577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3578 _mm512_mask3_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3579 __mmask16 __U, const int __R)
3581 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
3582 (__v16sf) __B,
3583 (__v16sf) __C,
3584 (__mmask16) __U, __R);
3587 extern __inline __m512
3588 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3589 _mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3590 __m512 __C, const int __R)
3592 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3593 (__v16sf) __B,
3594 -(__v16sf) __C,
3595 (__mmask16) __U, __R);
3597 #else
3598 #define _mm512_fmadd_round_pd(A, B, C, R) \
3599 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, -1, R)
3601 #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
3602 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3604 #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
3605 (__m512d)__builtin_ia32_vfmaddpd512_mask3(A, B, C, U, R)
3607 #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
3608 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, C, U, R)
3610 #define _mm512_fmadd_round_ps(A, B, C, R) \
3611 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, -1, R)
3613 #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
3614 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, U, R)
3616 #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
3617 (__m512)__builtin_ia32_vfmaddps512_mask3(A, B, C, U, R)
3619 #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
3620 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, C, U, R)
3622 #define _mm512_fmsub_round_pd(A, B, C, R) \
3623 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), -1, R)
3625 #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
3626 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), U, R)
3628 #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
3629 (__m512d)__builtin_ia32_vfmsubpd512_mask3(A, B, C, U, R)
3631 #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
3632 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, -(C), U, R)
3634 #define _mm512_fmsub_round_ps(A, B, C, R) \
3635 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), -1, R)
3637 #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
3638 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), U, R)
3640 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
3641 (__m512)__builtin_ia32_vfmsubps512_mask3(A, B, C, U, R)
3643 #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
3644 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, -(C), U, R)
3646 #define _mm512_fmaddsub_round_pd(A, B, C, R) \
3647 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, -1, R)
3649 #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
3650 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3652 #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
3653 (__m512d)__builtin_ia32_vfmaddsubpd512_mask3(A, B, C, U, R)
3655 #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
3656 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, C, U, R)
3658 #define _mm512_fmaddsub_round_ps(A, B, C, R) \
3659 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, -1, R)
3661 #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
3662 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, U, R)
3664 #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
3665 (__m512)__builtin_ia32_vfmaddsubps512_mask3(A, B, C, U, R)
3667 #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
3668 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, C, U, R)
3670 #define _mm512_fmsubadd_round_pd(A, B, C, R) \
3671 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), -1, R)
3673 #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
3674 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), U, R)
3676 #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
3677 (__m512d)__builtin_ia32_vfmsubaddpd512_mask3(A, B, C, U, R)
3679 #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
3680 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, -(C), U, R)
3682 #define _mm512_fmsubadd_round_ps(A, B, C, R) \
3683 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), -1, R)
3685 #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
3686 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), U, R)
3688 #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
3689 (__m512)__builtin_ia32_vfmsubaddps512_mask3(A, B, C, U, R)
3691 #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
3692 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, -(C), U, R)
3694 #define _mm512_fnmadd_round_pd(A, B, C, R) \
3695 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, C, -1, R)
3697 #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
3698 (__m512d)__builtin_ia32_vfnmaddpd512_mask(-(A), B, C, U, R)
3700 #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
3701 (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(A), B, C, U, R)
3703 #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
3704 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, C, U, R)
3706 #define _mm512_fnmadd_round_ps(A, B, C, R) \
3707 (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, C, -1, R)
3709 #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
3710 (__m512)__builtin_ia32_vfnmaddps512_mask(-(A), B, C, U, R)
3712 #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
3713 (__m512)__builtin_ia32_vfmaddps512_mask3(-(A), B, C, U, R)
3715 #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
3716 (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, C, U, R)
3718 #define _mm512_fnmsub_round_pd(A, B, C, R) \
3719 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, -(C), -1, R)
3721 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
3722 (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, U, R)
3724 #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
3725 (__m512d)__builtin_ia32_vfnmsubpd512_mask3(A, B, C, U, R)
3727 #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
3728 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, -(C), U, R)
3730 #define _mm512_fnmsub_round_ps(A, B, C, R) \
3731 (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, -(C), -1, R)
3733 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
3734 (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, U, R)
3736 #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
3737 (__m512)__builtin_ia32_vfnmsubps512_mask3(A, B, C, U, R)
3739 #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
3740 (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, -(C), U, R)
3741 #endif
3743 extern __inline __m512i
3744 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3745 _mm512_abs_epi64 (__m512i __A)
3747 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3748 (__v8di)
3749 _mm512_undefined_epi32 (),
3750 (__mmask8) -1);
3753 extern __inline __m512i
3754 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3755 _mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
3757 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3758 (__v8di) __W,
3759 (__mmask8) __U);
3762 extern __inline __m512i
3763 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3764 _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
3766 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3767 (__v8di)
3768 _mm512_setzero_si512 (),
3769 (__mmask8) __U);
3772 extern __inline __m512i
3773 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3774 _mm512_abs_epi32 (__m512i __A)
3776 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3777 (__v16si)
3778 _mm512_undefined_epi32 (),
3779 (__mmask16) -1);
3782 extern __inline __m512i
3783 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3784 _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
3786 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3787 (__v16si) __W,
3788 (__mmask16) __U);
3791 extern __inline __m512i
3792 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3793 _mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
3795 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3796 (__v16si)
3797 _mm512_setzero_si512 (),
3798 (__mmask16) __U);
3801 extern __inline __m512
3802 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3803 _mm512_broadcastss_ps (__m128 __A)
3805 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3806 (__v16sf)
3807 _mm512_undefined_ps (),
3808 (__mmask16) -1);
3811 extern __inline __m512
3812 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3813 _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
3815 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3816 (__v16sf) __O, __M);
3819 extern __inline __m512
3820 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3821 _mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
3823 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3824 (__v16sf)
3825 _mm512_setzero_ps (),
3826 __M);
3829 extern __inline __m512d
3830 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3831 _mm512_broadcastsd_pd (__m128d __A)
3833 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3834 (__v8df)
3835 _mm512_undefined_pd (),
3836 (__mmask8) -1);
3839 extern __inline __m512d
3840 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3841 _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
3843 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3844 (__v8df) __O, __M);
3847 extern __inline __m512d
3848 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3849 _mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
3851 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3852 (__v8df)
3853 _mm512_setzero_pd (),
3854 __M);
3857 extern __inline __m512i
3858 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3859 _mm512_broadcastd_epi32 (__m128i __A)
3861 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3862 (__v16si)
3863 _mm512_undefined_epi32 (),
3864 (__mmask16) -1);
3867 extern __inline __m512i
3868 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3869 _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
3871 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3872 (__v16si) __O, __M);
3875 extern __inline __m512i
3876 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3877 _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
3879 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3880 (__v16si)
3881 _mm512_setzero_si512 (),
3882 __M);
3885 extern __inline __m512i
3886 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3887 _mm512_set1_epi32 (int __A)
3889 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
3890 (__v16si)
3891 _mm512_undefined_epi32 (),
3892 (__mmask16)(-1));
3895 extern __inline __m512i
3896 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3897 _mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
3899 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O,
3900 __M);
3903 extern __inline __m512i
3904 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3905 _mm512_maskz_set1_epi32 (__mmask16 __M, int __A)
3907 return (__m512i)
3908 __builtin_ia32_pbroadcastd512_gpr_mask (__A,
3909 (__v16si) _mm512_setzero_si512 (),
3910 __M);
3913 extern __inline __m512i
3914 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3915 _mm512_broadcastq_epi64 (__m128i __A)
3917 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3918 (__v8di)
3919 _mm512_undefined_epi32 (),
3920 (__mmask8) -1);
3923 extern __inline __m512i
3924 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3925 _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
3927 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3928 (__v8di) __O, __M);
3931 extern __inline __m512i
3932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3933 _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
3935 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3936 (__v8di)
3937 _mm512_setzero_si512 (),
3938 __M);
3941 extern __inline __m512i
3942 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3943 _mm512_set1_epi64 (long long __A)
3945 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
3946 (__v8di)
3947 _mm512_undefined_epi32 (),
3948 (__mmask8)(-1));
3951 extern __inline __m512i
3952 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3953 _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
3955 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
3956 __M);
3959 extern __inline __m512i
3960 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3961 _mm512_maskz_set1_epi64 (__mmask8 __M, long long __A)
3963 return (__m512i)
3964 __builtin_ia32_pbroadcastq512_gpr_mask (__A,
3965 (__v8di) _mm512_setzero_si512 (),
3966 __M);
3969 extern __inline __m512
3970 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3971 _mm512_broadcast_f32x4 (__m128 __A)
3973 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3974 (__v16sf)
3975 _mm512_undefined_ps (),
3976 (__mmask16) -1);
3979 extern __inline __m512
3980 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3981 _mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A)
3983 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3984 (__v16sf) __O,
3985 __M);
3988 extern __inline __m512
3989 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3990 _mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A)
3992 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3993 (__v16sf)
3994 _mm512_setzero_ps (),
3995 __M);
3998 extern __inline __m512i
3999 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4000 _mm512_broadcast_i32x4 (__m128i __A)
4002 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
4003 (__v16si)
4004 _mm512_undefined_epi32 (),
4005 (__mmask16) -1);
4008 extern __inline __m512i
4009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4010 _mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A)
4012 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
4013 (__v16si) __O,
4014 __M);
4017 extern __inline __m512i
4018 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4019 _mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A)
4021 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
4022 (__v16si)
4023 _mm512_setzero_si512 (),
4024 __M);
4027 extern __inline __m512d
4028 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4029 _mm512_broadcast_f64x4 (__m256d __A)
4031 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
4032 (__v8df)
4033 _mm512_undefined_pd (),
4034 (__mmask8) -1);
4037 extern __inline __m512d
4038 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4039 _mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A)
4041 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
4042 (__v8df) __O,
4043 __M);
4046 extern __inline __m512d
4047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4048 _mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A)
4050 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
4051 (__v8df)
4052 _mm512_setzero_pd (),
4053 __M);
4056 extern __inline __m512i
4057 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4058 _mm512_broadcast_i64x4 (__m256i __A)
4060 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
4061 (__v8di)
4062 _mm512_undefined_epi32 (),
4063 (__mmask8) -1);
4066 extern __inline __m512i
4067 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4068 _mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A)
4070 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
4071 (__v8di) __O,
4072 __M);
4075 extern __inline __m512i
4076 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4077 _mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A)
4079 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
4080 (__v8di)
4081 _mm512_setzero_si512 (),
4082 __M);
4085 typedef enum
4087 _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
4088 _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
4089 _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
4090 _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
4091 _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
4092 _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
4093 _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
4094 _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
4095 _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
4096 _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
4097 _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
4098 _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
4099 _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
4100 _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
4101 _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
4102 _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
4103 _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
4104 _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
4105 _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
4106 _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
4107 _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
4108 _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
4109 _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
4110 _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
4111 _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
4112 _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
4113 _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
4114 _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
4115 _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
4116 _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
4117 _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
4118 _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
4119 _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
4120 _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
4121 _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
4122 _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
4123 _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
4124 _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
4125 _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
4126 _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
4127 _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
4128 _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
4129 _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
4130 _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
4131 _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
4132 _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
4133 _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
4134 _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
4135 _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
4136 _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
4137 _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
4138 _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
4139 _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
4140 _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
4141 _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
4142 _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
4143 _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
4144 _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
4145 _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
4146 _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
4147 _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
4148 _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
4149 _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
4150 _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
4151 _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
4152 _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
4153 _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
4154 _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
4155 _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
4156 _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
4157 _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
4158 _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
4159 _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
4160 _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
4161 _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
4162 _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
4163 _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
4164 _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
4165 _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
4166 _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
4167 _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
4168 _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
4169 _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
4170 _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
4171 _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
4172 _MM_PERM_DDDD = 0xFF
4173 } _MM_PERM_ENUM;
4175 #ifdef __OPTIMIZE__
4176 extern __inline __m512i
4177 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4178 _mm512_shuffle_epi32 (__m512i __A, _MM_PERM_ENUM __mask)
4180 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
4181 __mask,
4182 (__v16si)
4183 _mm512_undefined_epi32 (),
4184 (__mmask16) -1);
4187 extern __inline __m512i
4188 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4189 _mm512_mask_shuffle_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
4190 _MM_PERM_ENUM __mask)
4192 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
4193 __mask,
4194 (__v16si) __W,
4195 (__mmask16) __U);
4198 extern __inline __m512i
4199 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4200 _mm512_maskz_shuffle_epi32 (__mmask16 __U, __m512i __A, _MM_PERM_ENUM __mask)
4202 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
4203 __mask,
4204 (__v16si)
4205 _mm512_setzero_si512 (),
4206 (__mmask16) __U);
4209 extern __inline __m512i
4210 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4211 _mm512_shuffle_i64x2 (__m512i __A, __m512i __B, const int __imm)
4213 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
4214 (__v8di) __B, __imm,
4215 (__v8di)
4216 _mm512_undefined_epi32 (),
4217 (__mmask8) -1);
4220 extern __inline __m512i
4221 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4222 _mm512_mask_shuffle_i64x2 (__m512i __W, __mmask8 __U, __m512i __A,
4223 __m512i __B, const int __imm)
4225 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
4226 (__v8di) __B, __imm,
4227 (__v8di) __W,
4228 (__mmask8) __U);
4231 extern __inline __m512i
4232 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4233 _mm512_maskz_shuffle_i64x2 (__mmask8 __U, __m512i __A, __m512i __B,
4234 const int __imm)
4236 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
4237 (__v8di) __B, __imm,
4238 (__v8di)
4239 _mm512_setzero_si512 (),
4240 (__mmask8) __U);
4243 extern __inline __m512i
4244 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4245 _mm512_shuffle_i32x4 (__m512i __A, __m512i __B, const int __imm)
4247 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
4248 (__v16si) __B,
4249 __imm,
4250 (__v16si)
4251 _mm512_undefined_epi32 (),
4252 (__mmask16) -1);
4255 extern __inline __m512i
4256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4257 _mm512_mask_shuffle_i32x4 (__m512i __W, __mmask16 __U, __m512i __A,
4258 __m512i __B, const int __imm)
4260 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
4261 (__v16si) __B,
4262 __imm,
4263 (__v16si) __W,
4264 (__mmask16) __U);
4267 extern __inline __m512i
4268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4269 _mm512_maskz_shuffle_i32x4 (__mmask16 __U, __m512i __A, __m512i __B,
4270 const int __imm)
4272 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
4273 (__v16si) __B,
4274 __imm,
4275 (__v16si)
4276 _mm512_setzero_si512 (),
4277 (__mmask16) __U);
4280 extern __inline __m512d
4281 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4282 _mm512_shuffle_f64x2 (__m512d __A, __m512d __B, const int __imm)
4284 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
4285 (__v8df) __B, __imm,
4286 (__v8df)
4287 _mm512_undefined_pd (),
4288 (__mmask8) -1);
4291 extern __inline __m512d
4292 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4293 _mm512_mask_shuffle_f64x2 (__m512d __W, __mmask8 __U, __m512d __A,
4294 __m512d __B, const int __imm)
4296 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
4297 (__v8df) __B, __imm,
4298 (__v8df) __W,
4299 (__mmask8) __U);
4302 extern __inline __m512d
4303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4304 _mm512_maskz_shuffle_f64x2 (__mmask8 __U, __m512d __A, __m512d __B,
4305 const int __imm)
4307 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
4308 (__v8df) __B, __imm,
4309 (__v8df)
4310 _mm512_setzero_pd (),
4311 (__mmask8) __U);
4314 extern __inline __m512
4315 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4316 _mm512_shuffle_f32x4 (__m512 __A, __m512 __B, const int __imm)
4318 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4319 (__v16sf) __B, __imm,
4320 (__v16sf)
4321 _mm512_undefined_ps (),
4322 (__mmask16) -1);
4325 extern __inline __m512
4326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4327 _mm512_mask_shuffle_f32x4 (__m512 __W, __mmask16 __U, __m512 __A,
4328 __m512 __B, const int __imm)
4330 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4331 (__v16sf) __B, __imm,
4332 (__v16sf) __W,
4333 (__mmask16) __U);
4336 extern __inline __m512
4337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4338 _mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B,
4339 const int __imm)
4341 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4342 (__v16sf) __B, __imm,
4343 (__v16sf)
4344 _mm512_setzero_ps (),
4345 (__mmask16) __U);
4348 #else
4349 #define _mm512_shuffle_epi32(X, C) \
4350 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4351 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
4352 (__mmask16)-1))
4354 #define _mm512_mask_shuffle_epi32(W, U, X, C) \
4355 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4356 (__v16si)(__m512i)(W),\
4357 (__mmask16)(U)))
4359 #define _mm512_maskz_shuffle_epi32(U, X, C) \
4360 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4361 (__v16si)(__m512i)_mm512_setzero_si512 (),\
4362 (__mmask16)(U)))
4364 #define _mm512_shuffle_i64x2(X, Y, C) \
4365 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4366 (__v8di)(__m512i)(Y), (int)(C),\
4367 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
4368 (__mmask8)-1))
4370 #define _mm512_mask_shuffle_i64x2(W, U, X, Y, C) \
4371 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4372 (__v8di)(__m512i)(Y), (int)(C),\
4373 (__v8di)(__m512i)(W),\
4374 (__mmask8)(U)))
4376 #define _mm512_maskz_shuffle_i64x2(U, X, Y, C) \
4377 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4378 (__v8di)(__m512i)(Y), (int)(C),\
4379 (__v8di)(__m512i)_mm512_setzero_si512 (),\
4380 (__mmask8)(U)))
4382 #define _mm512_shuffle_i32x4(X, Y, C) \
4383 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4384 (__v16si)(__m512i)(Y), (int)(C),\
4385 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
4386 (__mmask16)-1))
4388 #define _mm512_mask_shuffle_i32x4(W, U, X, Y, C) \
4389 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4390 (__v16si)(__m512i)(Y), (int)(C),\
4391 (__v16si)(__m512i)(W),\
4392 (__mmask16)(U)))
4394 #define _mm512_maskz_shuffle_i32x4(U, X, Y, C) \
4395 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4396 (__v16si)(__m512i)(Y), (int)(C),\
4397 (__v16si)(__m512i)_mm512_setzero_si512 (),\
4398 (__mmask16)(U)))
4400 #define _mm512_shuffle_f64x2(X, Y, C) \
4401 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4402 (__v8df)(__m512d)(Y), (int)(C),\
4403 (__v8df)(__m512d)_mm512_undefined_pd(),\
4404 (__mmask8)-1))
4406 #define _mm512_mask_shuffle_f64x2(W, U, X, Y, C) \
4407 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4408 (__v8df)(__m512d)(Y), (int)(C),\
4409 (__v8df)(__m512d)(W),\
4410 (__mmask8)(U)))
4412 #define _mm512_maskz_shuffle_f64x2(U, X, Y, C) \
4413 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4414 (__v8df)(__m512d)(Y), (int)(C),\
4415 (__v8df)(__m512d)_mm512_setzero_pd(),\
4416 (__mmask8)(U)))
4418 #define _mm512_shuffle_f32x4(X, Y, C) \
4419 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4420 (__v16sf)(__m512)(Y), (int)(C),\
4421 (__v16sf)(__m512)_mm512_undefined_ps(),\
4422 (__mmask16)-1))
4424 #define _mm512_mask_shuffle_f32x4(W, U, X, Y, C) \
4425 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4426 (__v16sf)(__m512)(Y), (int)(C),\
4427 (__v16sf)(__m512)(W),\
4428 (__mmask16)(U)))
4430 #define _mm512_maskz_shuffle_f32x4(U, X, Y, C) \
4431 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4432 (__v16sf)(__m512)(Y), (int)(C),\
4433 (__v16sf)(__m512)_mm512_setzero_ps(),\
4434 (__mmask16)(U)))
4435 #endif
4437 extern __inline __m512i
4438 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4439 _mm512_rolv_epi32 (__m512i __A, __m512i __B)
4441 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4442 (__v16si) __B,
4443 (__v16si)
4444 _mm512_undefined_epi32 (),
4445 (__mmask16) -1);
4448 extern __inline __m512i
4449 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4450 _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4452 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4453 (__v16si) __B,
4454 (__v16si) __W,
4455 (__mmask16) __U);
4458 extern __inline __m512i
4459 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4460 _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4462 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4463 (__v16si) __B,
4464 (__v16si)
4465 _mm512_setzero_si512 (),
4466 (__mmask16) __U);
4469 extern __inline __m512i
4470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4471 _mm512_rorv_epi32 (__m512i __A, __m512i __B)
4473 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4474 (__v16si) __B,
4475 (__v16si)
4476 _mm512_undefined_epi32 (),
4477 (__mmask16) -1);
4480 extern __inline __m512i
4481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4482 _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4484 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4485 (__v16si) __B,
4486 (__v16si) __W,
4487 (__mmask16) __U);
4490 extern __inline __m512i
4491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4492 _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4494 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4495 (__v16si) __B,
4496 (__v16si)
4497 _mm512_setzero_si512 (),
4498 (__mmask16) __U);
4501 extern __inline __m512i
4502 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4503 _mm512_rolv_epi64 (__m512i __A, __m512i __B)
4505 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4506 (__v8di) __B,
4507 (__v8di)
4508 _mm512_undefined_epi32 (),
4509 (__mmask8) -1);
4512 extern __inline __m512i
4513 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4514 _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4516 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4517 (__v8di) __B,
4518 (__v8di) __W,
4519 (__mmask8) __U);
4522 extern __inline __m512i
4523 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4524 _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4526 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4527 (__v8di) __B,
4528 (__v8di)
4529 _mm512_setzero_si512 (),
4530 (__mmask8) __U);
4533 extern __inline __m512i
4534 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4535 _mm512_rorv_epi64 (__m512i __A, __m512i __B)
4537 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4538 (__v8di) __B,
4539 (__v8di)
4540 _mm512_undefined_epi32 (),
4541 (__mmask8) -1);
4544 extern __inline __m512i
4545 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4546 _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4548 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4549 (__v8di) __B,
4550 (__v8di) __W,
4551 (__mmask8) __U);
4554 extern __inline __m512i
4555 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4556 _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4558 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4559 (__v8di) __B,
4560 (__v8di)
4561 _mm512_setzero_si512 (),
4562 (__mmask8) __U);
4565 #ifdef __OPTIMIZE__
4566 extern __inline __m256i
4567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4568 _mm512_cvtt_roundpd_epi32 (__m512d __A, const int __R)
4570 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4571 (__v8si)
4572 _mm256_undefined_si256 (),
4573 (__mmask8) -1, __R);
4576 extern __inline __m256i
4577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4578 _mm512_mask_cvtt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4579 const int __R)
4581 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4582 (__v8si) __W,
4583 (__mmask8) __U, __R);
4586 extern __inline __m256i
4587 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4588 _mm512_maskz_cvtt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4590 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4591 (__v8si)
4592 _mm256_setzero_si256 (),
4593 (__mmask8) __U, __R);
4596 extern __inline __m256i
4597 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4598 _mm512_cvtt_roundpd_epu32 (__m512d __A, const int __R)
4600 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4601 (__v8si)
4602 _mm256_undefined_si256 (),
4603 (__mmask8) -1, __R);
4606 extern __inline __m256i
4607 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4608 _mm512_mask_cvtt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4609 const int __R)
4611 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4612 (__v8si) __W,
4613 (__mmask8) __U, __R);
4616 extern __inline __m256i
4617 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4618 _mm512_maskz_cvtt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4620 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4621 (__v8si)
4622 _mm256_setzero_si256 (),
4623 (__mmask8) __U, __R);
4625 #else
4626 #define _mm512_cvtt_roundpd_epi32(A, B) \
4627 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4629 #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, B) \
4630 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)(W), U, B))
4632 #define _mm512_maskz_cvtt_roundpd_epi32(U, A, B) \
4633 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4635 #define _mm512_cvtt_roundpd_epu32(A, B) \
4636 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4638 #define _mm512_mask_cvtt_roundpd_epu32(W, U, A, B) \
4639 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)(W), U, B))
4641 #define _mm512_maskz_cvtt_roundpd_epu32(U, A, B) \
4642 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4643 #endif
4645 #ifdef __OPTIMIZE__
4646 extern __inline __m256i
4647 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4648 _mm512_cvt_roundpd_epi32 (__m512d __A, const int __R)
4650 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4651 (__v8si)
4652 _mm256_undefined_si256 (),
4653 (__mmask8) -1, __R);
4656 extern __inline __m256i
4657 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4658 _mm512_mask_cvt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4659 const int __R)
4661 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4662 (__v8si) __W,
4663 (__mmask8) __U, __R);
4666 extern __inline __m256i
4667 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4668 _mm512_maskz_cvt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4670 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4671 (__v8si)
4672 _mm256_setzero_si256 (),
4673 (__mmask8) __U, __R);
4676 extern __inline __m256i
4677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4678 _mm512_cvt_roundpd_epu32 (__m512d __A, const int __R)
4680 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4681 (__v8si)
4682 _mm256_undefined_si256 (),
4683 (__mmask8) -1, __R);
4686 extern __inline __m256i
4687 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4688 _mm512_mask_cvt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4689 const int __R)
4691 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4692 (__v8si) __W,
4693 (__mmask8) __U, __R);
4696 extern __inline __m256i
4697 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4698 _mm512_maskz_cvt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4700 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4701 (__v8si)
4702 _mm256_setzero_si256 (),
4703 (__mmask8) __U, __R);
4705 #else
4706 #define _mm512_cvt_roundpd_epi32(A, B) \
4707 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4709 #define _mm512_mask_cvt_roundpd_epi32(W, U, A, B) \
4710 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)(W), U, B))
4712 #define _mm512_maskz_cvt_roundpd_epi32(U, A, B) \
4713 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4715 #define _mm512_cvt_roundpd_epu32(A, B) \
4716 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4718 #define _mm512_mask_cvt_roundpd_epu32(W, U, A, B) \
4719 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)(W), U, B))
4721 #define _mm512_maskz_cvt_roundpd_epu32(U, A, B) \
4722 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4723 #endif
4725 #ifdef __OPTIMIZE__
4726 extern __inline __m512i
4727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4728 _mm512_cvtt_roundps_epi32 (__m512 __A, const int __R)
4730 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4731 (__v16si)
4732 _mm512_undefined_epi32 (),
4733 (__mmask16) -1, __R);
4736 extern __inline __m512i
4737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4738 _mm512_mask_cvtt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4739 const int __R)
4741 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4742 (__v16si) __W,
4743 (__mmask16) __U, __R);
4746 extern __inline __m512i
4747 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4748 _mm512_maskz_cvtt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4750 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4751 (__v16si)
4752 _mm512_setzero_si512 (),
4753 (__mmask16) __U, __R);
4756 extern __inline __m512i
4757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4758 _mm512_cvtt_roundps_epu32 (__m512 __A, const int __R)
4760 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4761 (__v16si)
4762 _mm512_undefined_epi32 (),
4763 (__mmask16) -1, __R);
4766 extern __inline __m512i
4767 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4768 _mm512_mask_cvtt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
4769 const int __R)
4771 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4772 (__v16si) __W,
4773 (__mmask16) __U, __R);
4776 extern __inline __m512i
4777 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4778 _mm512_maskz_cvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
4780 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4781 (__v16si)
4782 _mm512_setzero_si512 (),
4783 (__mmask16) __U, __R);
4785 #else
4786 #define _mm512_cvtt_roundps_epi32(A, B) \
4787 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
4789 #define _mm512_mask_cvtt_roundps_epi32(W, U, A, B) \
4790 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)(W), U, B))
4792 #define _mm512_maskz_cvtt_roundps_epi32(U, A, B) \
4793 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4795 #define _mm512_cvtt_roundps_epu32(A, B) \
4796 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
4798 #define _mm512_mask_cvtt_roundps_epu32(W, U, A, B) \
4799 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)(W), U, B))
4801 #define _mm512_maskz_cvtt_roundps_epu32(U, A, B) \
4802 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4803 #endif
4805 #ifdef __OPTIMIZE__
4806 extern __inline __m512i
4807 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4808 _mm512_cvt_roundps_epi32 (__m512 __A, const int __R)
4810 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4811 (__v16si)
4812 _mm512_undefined_epi32 (),
4813 (__mmask16) -1, __R);
4816 extern __inline __m512i
4817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4818 _mm512_mask_cvt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4819 const int __R)
4821 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4822 (__v16si) __W,
4823 (__mmask16) __U, __R);
4826 extern __inline __m512i
4827 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4828 _mm512_maskz_cvt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4830 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4831 (__v16si)
4832 _mm512_setzero_si512 (),
4833 (__mmask16) __U, __R);
4836 extern __inline __m512i
4837 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4838 _mm512_cvt_roundps_epu32 (__m512 __A, const int __R)
4840 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4841 (__v16si)
4842 _mm512_undefined_epi32 (),
4843 (__mmask16) -1, __R);
4846 extern __inline __m512i
4847 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4848 _mm512_mask_cvt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
4849 const int __R)
4851 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4852 (__v16si) __W,
4853 (__mmask16) __U, __R);
4856 extern __inline __m512i
4857 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4858 _mm512_maskz_cvt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
4860 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4861 (__v16si)
4862 _mm512_setzero_si512 (),
4863 (__mmask16) __U, __R);
4865 #else
4866 #define _mm512_cvt_roundps_epi32(A, B) \
4867 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
4869 #define _mm512_mask_cvt_roundps_epi32(W, U, A, B) \
4870 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)(W), U, B))
4872 #define _mm512_maskz_cvt_roundps_epi32(U, A, B) \
4873 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4875 #define _mm512_cvt_roundps_epu32(A, B) \
4876 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
4878 #define _mm512_mask_cvt_roundps_epu32(W, U, A, B) \
4879 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)(W), U, B))
4881 #define _mm512_maskz_cvt_roundps_epu32(U, A, B) \
4882 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4883 #endif
4885 extern __inline __m128d
4886 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4887 _mm_cvtu32_sd (__m128d __A, unsigned __B)
4889 return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
4892 #ifdef __x86_64__
4893 #ifdef __OPTIMIZE__
4894 extern __inline __m128d
4895 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4896 _mm_cvt_roundu64_sd (__m128d __A, unsigned long long __B, const int __R)
4898 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, __R);
4901 extern __inline __m128d
4902 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4903 _mm_cvt_roundi64_sd (__m128d __A, long long __B, const int __R)
4905 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
4908 extern __inline __m128d
4909 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4910 _mm_cvt_roundsi64_sd (__m128d __A, long long __B, const int __R)
4912 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
4914 #else
4915 #define _mm_cvt_roundu64_sd(A, B, C) \
4916 (__m128d)__builtin_ia32_cvtusi2sd64(A, B, C)
4918 #define _mm_cvt_roundi64_sd(A, B, C) \
4919 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
4921 #define _mm_cvt_roundsi64_sd(A, B, C) \
4922 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
4923 #endif
4925 #endif
4927 #ifdef __OPTIMIZE__
4928 extern __inline __m128
4929 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4930 _mm_cvt_roundu32_ss (__m128 __A, unsigned __B, const int __R)
4932 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, __R);
4935 extern __inline __m128
4936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4937 _mm_cvt_roundsi32_ss (__m128 __A, int __B, const int __R)
4939 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
4942 extern __inline __m128
4943 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4944 _mm_cvt_roundi32_ss (__m128 __A, int __B, const int __R)
4946 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
4948 #else
4949 #define _mm_cvt_roundu32_ss(A, B, C) \
4950 (__m128)__builtin_ia32_cvtusi2ss32(A, B, C)
4952 #define _mm_cvt_roundi32_ss(A, B, C) \
4953 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
4955 #define _mm_cvt_roundsi32_ss(A, B, C) \
4956 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
4957 #endif
4959 #ifdef __x86_64__
4960 #ifdef __OPTIMIZE__
4961 extern __inline __m128
4962 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4963 _mm_cvt_roundu64_ss (__m128 __A, unsigned long long __B, const int __R)
4965 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, __R);
4968 extern __inline __m128
4969 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4970 _mm_cvt_roundsi64_ss (__m128 __A, long long __B, const int __R)
4972 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
4975 extern __inline __m128
4976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4977 _mm_cvt_roundi64_ss (__m128 __A, long long __B, const int __R)
4979 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
4981 #else
4982 #define _mm_cvt_roundu64_ss(A, B, C) \
4983 (__m128)__builtin_ia32_cvtusi2ss64(A, B, C)
4985 #define _mm_cvt_roundi64_ss(A, B, C) \
4986 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
4988 #define _mm_cvt_roundsi64_ss(A, B, C) \
4989 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
4990 #endif
4992 #endif
4994 extern __inline __m128i
4995 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4996 _mm512_cvtepi32_epi8 (__m512i __A)
4998 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4999 (__v16qi)
5000 _mm_undefined_si128 (),
5001 (__mmask16) -1);
5004 extern __inline void
5005 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5006 _mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
5008 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
5011 extern __inline __m128i
5012 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5013 _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
5015 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
5016 (__v16qi) __O, __M);
5019 extern __inline __m128i
5020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5021 _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
5023 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
5024 (__v16qi)
5025 _mm_setzero_si128 (),
5026 __M);
5029 extern __inline __m128i
5030 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5031 _mm512_cvtsepi32_epi8 (__m512i __A)
5033 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
5034 (__v16qi)
5035 _mm_undefined_si128 (),
5036 (__mmask16) -1);
5039 extern __inline void
5040 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5041 _mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
5043 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
5046 extern __inline __m128i
5047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5048 _mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
5050 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
5051 (__v16qi) __O, __M);
5054 extern __inline __m128i
5055 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5056 _mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
5058 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
5059 (__v16qi)
5060 _mm_setzero_si128 (),
5061 __M);
5064 extern __inline __m128i
5065 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5066 _mm512_cvtusepi32_epi8 (__m512i __A)
5068 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
5069 (__v16qi)
5070 _mm_undefined_si128 (),
5071 (__mmask16) -1);
5074 extern __inline void
5075 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5076 _mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
5078 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
5081 extern __inline __m128i
5082 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5083 _mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
5085 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
5086 (__v16qi) __O,
5087 __M);
5090 extern __inline __m128i
5091 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5092 _mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
5094 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
5095 (__v16qi)
5096 _mm_setzero_si128 (),
5097 __M);
5100 extern __inline __m256i
5101 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5102 _mm512_cvtepi32_epi16 (__m512i __A)
5104 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
5105 (__v16hi)
5106 _mm256_undefined_si256 (),
5107 (__mmask16) -1);
5110 extern __inline void
5111 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5112 _mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
5114 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
5117 extern __inline __m256i
5118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5119 _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
5121 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
5122 (__v16hi) __O, __M);
5125 extern __inline __m256i
5126 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5127 _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
5129 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
5130 (__v16hi)
5131 _mm256_setzero_si256 (),
5132 __M);
5135 extern __inline __m256i
5136 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5137 _mm512_cvtsepi32_epi16 (__m512i __A)
5139 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
5140 (__v16hi)
5141 _mm256_undefined_si256 (),
5142 (__mmask16) -1);
5145 extern __inline void
5146 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5147 _mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
5149 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
5152 extern __inline __m256i
5153 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5154 _mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
5156 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
5157 (__v16hi) __O, __M);
5160 extern __inline __m256i
5161 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5162 _mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
5164 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
5165 (__v16hi)
5166 _mm256_setzero_si256 (),
5167 __M);
5170 extern __inline __m256i
5171 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5172 _mm512_cvtusepi32_epi16 (__m512i __A)
5174 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
5175 (__v16hi)
5176 _mm256_undefined_si256 (),
5177 (__mmask16) -1);
5180 extern __inline void
5181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5182 _mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
5184 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
5187 extern __inline __m256i
5188 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5189 _mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
5191 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
5192 (__v16hi) __O,
5193 __M);
5196 extern __inline __m256i
5197 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5198 _mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
5200 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
5201 (__v16hi)
5202 _mm256_setzero_si256 (),
5203 __M);
5206 extern __inline __m256i
5207 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5208 _mm512_cvtepi64_epi32 (__m512i __A)
5210 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
5211 (__v8si)
5212 _mm256_undefined_si256 (),
5213 (__mmask8) -1);
5216 extern __inline void
5217 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5218 _mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
5220 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
5223 extern __inline __m256i
5224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5225 _mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
5227 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
5228 (__v8si) __O, __M);
5231 extern __inline __m256i
5232 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5233 _mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
5235 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
5236 (__v8si)
5237 _mm256_setzero_si256 (),
5238 __M);
5241 extern __inline __m256i
5242 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5243 _mm512_cvtsepi64_epi32 (__m512i __A)
5245 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
5246 (__v8si)
5247 _mm256_undefined_si256 (),
5248 (__mmask8) -1);
5251 extern __inline void
5252 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5253 _mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
5255 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
5258 extern __inline __m256i
5259 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5260 _mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
5262 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
5263 (__v8si) __O, __M);
5266 extern __inline __m256i
5267 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5268 _mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
5270 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
5271 (__v8si)
5272 _mm256_setzero_si256 (),
5273 __M);
5276 extern __inline __m256i
5277 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5278 _mm512_cvtusepi64_epi32 (__m512i __A)
5280 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
5281 (__v8si)
5282 _mm256_undefined_si256 (),
5283 (__mmask8) -1);
5286 extern __inline void
5287 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5288 _mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
5290 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
5293 extern __inline __m256i
5294 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5295 _mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
5297 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
5298 (__v8si) __O, __M);
5301 extern __inline __m256i
5302 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5303 _mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
5305 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
5306 (__v8si)
5307 _mm256_setzero_si256 (),
5308 __M);
5311 extern __inline __m128i
5312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5313 _mm512_cvtepi64_epi16 (__m512i __A)
5315 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5316 (__v8hi)
5317 _mm_undefined_si128 (),
5318 (__mmask8) -1);
5321 extern __inline void
5322 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5323 _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
5325 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
5328 extern __inline __m128i
5329 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5330 _mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5332 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5333 (__v8hi) __O, __M);
5336 extern __inline __m128i
5337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5338 _mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
5340 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5341 (__v8hi)
5342 _mm_setzero_si128 (),
5343 __M);
5346 extern __inline __m128i
5347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5348 _mm512_cvtsepi64_epi16 (__m512i __A)
5350 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5351 (__v8hi)
5352 _mm_undefined_si128 (),
5353 (__mmask8) -1);
5356 extern __inline void
5357 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5358 _mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
5360 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
5363 extern __inline __m128i
5364 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5365 _mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5367 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5368 (__v8hi) __O, __M);
5371 extern __inline __m128i
5372 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5373 _mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
5375 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5376 (__v8hi)
5377 _mm_setzero_si128 (),
5378 __M);
5381 extern __inline __m128i
5382 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5383 _mm512_cvtusepi64_epi16 (__m512i __A)
5385 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5386 (__v8hi)
5387 _mm_undefined_si128 (),
5388 (__mmask8) -1);
5391 extern __inline void
5392 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5393 _mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
5395 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
5398 extern __inline __m128i
5399 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5400 _mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5402 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5403 (__v8hi) __O, __M);
5406 extern __inline __m128i
5407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5408 _mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
5410 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5411 (__v8hi)
5412 _mm_setzero_si128 (),
5413 __M);
5416 extern __inline __m128i
5417 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5418 _mm512_cvtepi64_epi8 (__m512i __A)
5420 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5421 (__v16qi)
5422 _mm_undefined_si128 (),
5423 (__mmask8) -1);
5426 extern __inline void
5427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5428 _mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5430 __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5433 extern __inline __m128i
5434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5435 _mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5437 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5438 (__v16qi) __O, __M);
5441 extern __inline __m128i
5442 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5443 _mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
5445 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5446 (__v16qi)
5447 _mm_setzero_si128 (),
5448 __M);
5451 extern __inline __m128i
5452 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5453 _mm512_cvtsepi64_epi8 (__m512i __A)
5455 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5456 (__v16qi)
5457 _mm_undefined_si128 (),
5458 (__mmask8) -1);
5461 extern __inline void
5462 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5463 _mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5465 __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5468 extern __inline __m128i
5469 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5470 _mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5472 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5473 (__v16qi) __O, __M);
5476 extern __inline __m128i
5477 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5478 _mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
5480 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5481 (__v16qi)
5482 _mm_setzero_si128 (),
5483 __M);
5486 extern __inline __m128i
5487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5488 _mm512_cvtusepi64_epi8 (__m512i __A)
5490 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5491 (__v16qi)
5492 _mm_undefined_si128 (),
5493 (__mmask8) -1);
5496 extern __inline void
5497 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5498 _mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5500 __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5503 extern __inline __m128i
5504 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5505 _mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5507 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5508 (__v16qi) __O,
5509 __M);
5512 extern __inline __m128i
5513 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5514 _mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
5516 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5517 (__v16qi)
5518 _mm_setzero_si128 (),
5519 __M);
5522 extern __inline __m512d
5523 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5524 _mm512_cvtepi32_pd (__m256i __A)
5526 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5527 (__v8df)
5528 _mm512_undefined_pd (),
5529 (__mmask8) -1);
5532 extern __inline __m512d
5533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5534 _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5536 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5537 (__v8df) __W,
5538 (__mmask8) __U);
5541 extern __inline __m512d
5542 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5543 _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
5545 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5546 (__v8df)
5547 _mm512_setzero_pd (),
5548 (__mmask8) __U);
5551 extern __inline __m512d
5552 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5553 _mm512_cvtepu32_pd (__m256i __A)
5555 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5556 (__v8df)
5557 _mm512_undefined_pd (),
5558 (__mmask8) -1);
5561 extern __inline __m512d
5562 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5563 _mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5565 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5566 (__v8df) __W,
5567 (__mmask8) __U);
5570 extern __inline __m512d
5571 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5572 _mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
5574 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5575 (__v8df)
5576 _mm512_setzero_pd (),
5577 (__mmask8) __U);
5580 #ifdef __OPTIMIZE__
5581 extern __inline __m512
5582 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5583 _mm512_cvt_roundepi32_ps (__m512i __A, const int __R)
5585 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5586 (__v16sf)
5587 _mm512_undefined_ps (),
5588 (__mmask16) -1, __R);
5591 extern __inline __m512
5592 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5593 _mm512_mask_cvt_roundepi32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5594 const int __R)
5596 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5597 (__v16sf) __W,
5598 (__mmask16) __U, __R);
5601 extern __inline __m512
5602 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5603 _mm512_maskz_cvt_roundepi32_ps (__mmask16 __U, __m512i __A, const int __R)
5605 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5606 (__v16sf)
5607 _mm512_setzero_ps (),
5608 (__mmask16) __U, __R);
5611 extern __inline __m512
5612 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5613 _mm512_cvt_roundepu32_ps (__m512i __A, const int __R)
5615 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5616 (__v16sf)
5617 _mm512_undefined_ps (),
5618 (__mmask16) -1, __R);
5621 extern __inline __m512
5622 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5623 _mm512_mask_cvt_roundepu32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5624 const int __R)
5626 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5627 (__v16sf) __W,
5628 (__mmask16) __U, __R);
5631 extern __inline __m512
5632 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5633 _mm512_maskz_cvt_roundepu32_ps (__mmask16 __U, __m512i __A, const int __R)
5635 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5636 (__v16sf)
5637 _mm512_setzero_ps (),
5638 (__mmask16) __U, __R);
5641 #else
5642 #define _mm512_cvt_roundepi32_ps(A, B) \
5643 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
5645 #define _mm512_mask_cvt_roundepi32_ps(W, U, A, B) \
5646 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), W, U, B)
5648 #define _mm512_maskz_cvt_roundepi32_ps(U, A, B) \
5649 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5651 #define _mm512_cvt_roundepu32_ps(A, B) \
5652 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
5654 #define _mm512_mask_cvt_roundepu32_ps(W, U, A, B) \
5655 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), W, U, B)
5657 #define _mm512_maskz_cvt_roundepu32_ps(U, A, B) \
5658 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5659 #endif
5661 #ifdef __OPTIMIZE__
5662 extern __inline __m256d
5663 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5664 _mm512_extractf64x4_pd (__m512d __A, const int __imm)
5666 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5667 __imm,
5668 (__v4df)
5669 _mm256_undefined_pd (),
5670 (__mmask8) -1);
5673 extern __inline __m256d
5674 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5675 _mm512_mask_extractf64x4_pd (__m256d __W, __mmask8 __U, __m512d __A,
5676 const int __imm)
5678 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5679 __imm,
5680 (__v4df) __W,
5681 (__mmask8) __U);
5684 extern __inline __m256d
5685 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5686 _mm512_maskz_extractf64x4_pd (__mmask8 __U, __m512d __A, const int __imm)
5688 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5689 __imm,
5690 (__v4df)
5691 _mm256_setzero_pd (),
5692 (__mmask8) __U);
5695 extern __inline __m128
5696 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5697 _mm512_extractf32x4_ps (__m512 __A, const int __imm)
5699 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5700 __imm,
5701 (__v4sf)
5702 _mm_undefined_ps (),
5703 (__mmask8) -1);
5706 extern __inline __m128
5707 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5708 _mm512_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m512 __A,
5709 const int __imm)
5711 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5712 __imm,
5713 (__v4sf) __W,
5714 (__mmask8) __U);
5717 extern __inline __m128
5718 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5719 _mm512_maskz_extractf32x4_ps (__mmask8 __U, __m512 __A, const int __imm)
5721 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5722 __imm,
5723 (__v4sf)
5724 _mm_setzero_ps (),
5725 (__mmask8) __U);
5728 extern __inline __m256i
5729 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5730 _mm512_extracti64x4_epi64 (__m512i __A, const int __imm)
5732 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5733 __imm,
5734 (__v4di)
5735 _mm256_undefined_si256 (),
5736 (__mmask8) -1);
5739 extern __inline __m256i
5740 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5741 _mm512_mask_extracti64x4_epi64 (__m256i __W, __mmask8 __U, __m512i __A,
5742 const int __imm)
5744 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5745 __imm,
5746 (__v4di) __W,
5747 (__mmask8) __U);
5750 extern __inline __m256i
5751 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5752 _mm512_maskz_extracti64x4_epi64 (__mmask8 __U, __m512i __A, const int __imm)
5754 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5755 __imm,
5756 (__v4di)
5757 _mm256_setzero_si256 (),
5758 (__mmask8) __U);
5761 extern __inline __m128i
5762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5763 _mm512_extracti32x4_epi32 (__m512i __A, const int __imm)
5765 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5766 __imm,
5767 (__v4si)
5768 _mm_undefined_si128 (),
5769 (__mmask8) -1);
5772 extern __inline __m128i
5773 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5774 _mm512_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m512i __A,
5775 const int __imm)
5777 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5778 __imm,
5779 (__v4si) __W,
5780 (__mmask8) __U);
5783 extern __inline __m128i
5784 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5785 _mm512_maskz_extracti32x4_epi32 (__mmask8 __U, __m512i __A, const int __imm)
5787 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5788 __imm,
5789 (__v4si)
5790 _mm_setzero_si128 (),
5791 (__mmask8) __U);
5793 #else
5795 #define _mm512_extractf64x4_pd(X, C) \
5796 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5797 (int) (C),\
5798 (__v4df)(__m256d)_mm256_undefined_pd(),\
5799 (__mmask8)-1))
5801 #define _mm512_mask_extractf64x4_pd(W, U, X, C) \
5802 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5803 (int) (C),\
5804 (__v4df)(__m256d)(W),\
5805 (__mmask8)(U)))
5807 #define _mm512_maskz_extractf64x4_pd(U, X, C) \
5808 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5809 (int) (C),\
5810 (__v4df)(__m256d)_mm256_setzero_pd(),\
5811 (__mmask8)(U)))
5813 #define _mm512_extractf32x4_ps(X, C) \
5814 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5815 (int) (C),\
5816 (__v4sf)(__m128)_mm_undefined_ps(),\
5817 (__mmask8)-1))
5819 #define _mm512_mask_extractf32x4_ps(W, U, X, C) \
5820 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5821 (int) (C),\
5822 (__v4sf)(__m128)(W),\
5823 (__mmask8)(U)))
5825 #define _mm512_maskz_extractf32x4_ps(U, X, C) \
5826 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5827 (int) (C),\
5828 (__v4sf)(__m128)_mm_setzero_ps(),\
5829 (__mmask8)(U)))
5831 #define _mm512_extracti64x4_epi64(X, C) \
5832 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5833 (int) (C),\
5834 (__v4di)(__m256i)_mm256_undefined_si256 (),\
5835 (__mmask8)-1))
5837 #define _mm512_mask_extracti64x4_epi64(W, U, X, C) \
5838 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5839 (int) (C),\
5840 (__v4di)(__m256i)(W),\
5841 (__mmask8)(U)))
5843 #define _mm512_maskz_extracti64x4_epi64(U, X, C) \
5844 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5845 (int) (C),\
5846 (__v4di)(__m256i)_mm256_setzero_si256 (),\
5847 (__mmask8)(U)))
5849 #define _mm512_extracti32x4_epi32(X, C) \
5850 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5851 (int) (C),\
5852 (__v4si)(__m128i)_mm_undefined_si128 (),\
5853 (__mmask8)-1))
5855 #define _mm512_mask_extracti32x4_epi32(W, U, X, C) \
5856 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5857 (int) (C),\
5858 (__v4si)(__m128i)(W),\
5859 (__mmask8)(U)))
5861 #define _mm512_maskz_extracti32x4_epi32(U, X, C) \
5862 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5863 (int) (C),\
5864 (__v4si)(__m128i)_mm_setzero_si128 (),\
5865 (__mmask8)(U)))
5866 #endif
5868 #ifdef __OPTIMIZE__
5869 extern __inline __m512i
5870 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5871 _mm512_inserti32x4 (__m512i __A, __m128i __B, const int __imm)
5873 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __A,
5874 (__v4si) __B,
5875 __imm,
5876 (__v16si) __A, -1);
5879 extern __inline __m512
5880 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5881 _mm512_insertf32x4 (__m512 __A, __m128 __B, const int __imm)
5883 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __A,
5884 (__v4sf) __B,
5885 __imm,
5886 (__v16sf) __A, -1);
5889 extern __inline __m512i
5890 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5891 _mm512_inserti64x4 (__m512i __A, __m256i __B, const int __imm)
5893 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5894 (__v4di) __B,
5895 __imm,
5896 (__v8di)
5897 _mm512_undefined_epi32 (),
5898 (__mmask8) -1);
5901 extern __inline __m512i
5902 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5903 _mm512_mask_inserti64x4 (__m512i __W, __mmask8 __U, __m512i __A,
5904 __m256i __B, const int __imm)
5906 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5907 (__v4di) __B,
5908 __imm,
5909 (__v8di) __W,
5910 (__mmask8) __U);
5913 extern __inline __m512i
5914 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5915 _mm512_maskz_inserti64x4 (__mmask8 __U, __m512i __A, __m256i __B,
5916 const int __imm)
5918 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5919 (__v4di) __B,
5920 __imm,
5921 (__v8di)
5922 _mm512_setzero_si512 (),
5923 (__mmask8) __U);
5926 extern __inline __m512d
5927 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5928 _mm512_insertf64x4 (__m512d __A, __m256d __B, const int __imm)
5930 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5931 (__v4df) __B,
5932 __imm,
5933 (__v8df)
5934 _mm512_undefined_pd (),
5935 (__mmask8) -1);
5938 extern __inline __m512d
5939 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5940 _mm512_mask_insertf64x4 (__m512d __W, __mmask8 __U, __m512d __A,
5941 __m256d __B, const int __imm)
5943 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5944 (__v4df) __B,
5945 __imm,
5946 (__v8df) __W,
5947 (__mmask8) __U);
5950 extern __inline __m512d
5951 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5952 _mm512_maskz_insertf64x4 (__mmask8 __U, __m512d __A, __m256d __B,
5953 const int __imm)
5955 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5956 (__v4df) __B,
5957 __imm,
5958 (__v8df)
5959 _mm512_setzero_pd (),
5960 (__mmask8) __U);
5962 #else
5963 #define _mm512_insertf32x4(X, Y, C) \
5964 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
5965 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (X), (__mmask16)(-1)))
5967 #define _mm512_inserti32x4(X, Y, C) \
5968 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
5969 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (X), (__mmask16)(-1)))
5971 #define _mm512_insertf64x4(X, Y, C) \
5972 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5973 (__v4df)(__m256d) (Y), (int) (C), \
5974 (__v8df)(__m512d)_mm512_undefined_pd(), \
5975 (__mmask8)-1))
5977 #define _mm512_mask_insertf64x4(W, U, X, Y, C) \
5978 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5979 (__v4df)(__m256d) (Y), (int) (C), \
5980 (__v8df)(__m512d)(W), \
5981 (__mmask8)(U)))
5983 #define _mm512_maskz_insertf64x4(U, X, Y, C) \
5984 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5985 (__v4df)(__m256d) (Y), (int) (C), \
5986 (__v8df)(__m512d)_mm512_setzero_pd(), \
5987 (__mmask8)(U)))
5989 #define _mm512_inserti64x4(X, Y, C) \
5990 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
5991 (__v4di)(__m256i) (Y), (int) (C), \
5992 (__v8di)(__m512i)_mm512_undefined_epi32 (), \
5993 (__mmask8)-1))
5995 #define _mm512_mask_inserti64x4(W, U, X, Y, C) \
5996 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
5997 (__v4di)(__m256i) (Y), (int) (C),\
5998 (__v8di)(__m512i)(W),\
5999 (__mmask8)(U)))
6001 #define _mm512_maskz_inserti64x4(U, X, Y, C) \
6002 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
6003 (__v4di)(__m256i) (Y), (int) (C), \
6004 (__v8di)(__m512i)_mm512_setzero_si512 (), \
6005 (__mmask8)(U)))
6006 #endif
6008 extern __inline __m512d
6009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6010 _mm512_loadu_pd (void const *__P)
6012 return *(__m512d_u *)__P;
6015 extern __inline __m512d
6016 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6017 _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
6019 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
6020 (__v8df) __W,
6021 (__mmask8) __U);
6024 extern __inline __m512d
6025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6026 _mm512_maskz_loadu_pd (__mmask8 __U, void const *__P)
6028 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
6029 (__v8df)
6030 _mm512_setzero_pd (),
6031 (__mmask8) __U);
6034 extern __inline void
6035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6036 _mm512_storeu_pd (void *__P, __m512d __A)
6038 *(__m512d_u *)__P = __A;
6041 extern __inline void
6042 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6043 _mm512_mask_storeu_pd (void *__P, __mmask8 __U, __m512d __A)
6045 __builtin_ia32_storeupd512_mask ((double *) __P, (__v8df) __A,
6046 (__mmask8) __U);
6049 extern __inline __m512
6050 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6051 _mm512_loadu_ps (void const *__P)
6053 return *(__m512_u *)__P;
6056 extern __inline __m512
6057 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6058 _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
6060 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
6061 (__v16sf) __W,
6062 (__mmask16) __U);
6065 extern __inline __m512
6066 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6067 _mm512_maskz_loadu_ps (__mmask16 __U, void const *__P)
6069 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
6070 (__v16sf)
6071 _mm512_setzero_ps (),
6072 (__mmask16) __U);
6075 extern __inline void
6076 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6077 _mm512_storeu_ps (void *__P, __m512 __A)
6079 *(__m512_u *)__P = __A;
6082 extern __inline void
6083 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6084 _mm512_mask_storeu_ps (void *__P, __mmask16 __U, __m512 __A)
6086 __builtin_ia32_storeups512_mask ((float *) __P, (__v16sf) __A,
6087 (__mmask16) __U);
6090 extern __inline __m512i
6091 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6092 _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
6094 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
6095 (__v8di) __W,
6096 (__mmask8) __U);
6099 extern __inline __m512i
6100 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6101 _mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
6103 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
6104 (__v8di)
6105 _mm512_setzero_si512 (),
6106 (__mmask8) __U);
6109 extern __inline void
6110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6111 _mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A)
6113 __builtin_ia32_storedqudi512_mask ((long long *) __P, (__v8di) __A,
6114 (__mmask8) __U);
6117 extern __inline __m512i
6118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6119 _mm512_loadu_si512 (void const *__P)
6121 return *(__m512i_u *)__P;
6124 extern __inline __m512i
6125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6126 _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
6128 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
6129 (__v16si) __W,
6130 (__mmask16) __U);
6133 extern __inline __m512i
6134 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6135 _mm512_maskz_loadu_epi32 (__mmask16 __U, void const *__P)
6137 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
6138 (__v16si)
6139 _mm512_setzero_si512 (),
6140 (__mmask16) __U);
6143 extern __inline void
6144 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6145 _mm512_storeu_si512 (void *__P, __m512i __A)
6147 *(__m512i_u *)__P = __A;
6150 extern __inline void
6151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6152 _mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A)
6154 __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A,
6155 (__mmask16) __U);
6158 extern __inline __m512d
6159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6160 _mm512_permutevar_pd (__m512d __A, __m512i __C)
6162 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
6163 (__v8di) __C,
6164 (__v8df)
6165 _mm512_undefined_pd (),
6166 (__mmask8) -1);
6169 extern __inline __m512d
6170 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6171 _mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
6173 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
6174 (__v8di) __C,
6175 (__v8df) __W,
6176 (__mmask8) __U);
6179 extern __inline __m512d
6180 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6181 _mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C)
6183 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
6184 (__v8di) __C,
6185 (__v8df)
6186 _mm512_setzero_pd (),
6187 (__mmask8) __U);
6190 extern __inline __m512
6191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6192 _mm512_permutevar_ps (__m512 __A, __m512i __C)
6194 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
6195 (__v16si) __C,
6196 (__v16sf)
6197 _mm512_undefined_ps (),
6198 (__mmask16) -1);
6201 extern __inline __m512
6202 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6203 _mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
6205 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
6206 (__v16si) __C,
6207 (__v16sf) __W,
6208 (__mmask16) __U);
6211 extern __inline __m512
6212 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6213 _mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C)
6215 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
6216 (__v16si) __C,
6217 (__v16sf)
6218 _mm512_setzero_ps (),
6219 (__mmask16) __U);
6222 extern __inline __m512i
6223 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6224 _mm512_permutex2var_epi64 (__m512i __A, __m512i __I, __m512i __B)
6226 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
6227 /* idx */ ,
6228 (__v8di) __A,
6229 (__v8di) __B,
6230 (__mmask8) -1);
6233 extern __inline __m512i
6234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6235 _mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
6236 __m512i __B)
6238 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
6239 /* idx */ ,
6240 (__v8di) __A,
6241 (__v8di) __B,
6242 (__mmask8) __U);
6245 extern __inline __m512i
6246 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6247 _mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
6248 __mmask8 __U, __m512i __B)
6250 return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
6251 (__v8di) __I
6252 /* idx */ ,
6253 (__v8di) __B,
6254 (__mmask8) __U);
6257 extern __inline __m512i
6258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6259 _mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
6260 __m512i __I, __m512i __B)
6262 return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
6263 /* idx */ ,
6264 (__v8di) __A,
6265 (__v8di) __B,
6266 (__mmask8) __U);
6269 extern __inline __m512i
6270 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6271 _mm512_permutex2var_epi32 (__m512i __A, __m512i __I, __m512i __B)
6273 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
6274 /* idx */ ,
6275 (__v16si) __A,
6276 (__v16si) __B,
6277 (__mmask16) -1);
6280 extern __inline __m512i
6281 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6282 _mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
6283 __m512i __I, __m512i __B)
6285 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
6286 /* idx */ ,
6287 (__v16si) __A,
6288 (__v16si) __B,
6289 (__mmask16) __U);
6292 extern __inline __m512i
6293 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6294 _mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
6295 __mmask16 __U, __m512i __B)
6297 return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
6298 (__v16si) __I
6299 /* idx */ ,
6300 (__v16si) __B,
6301 (__mmask16) __U);
6304 extern __inline __m512i
6305 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6306 _mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
6307 __m512i __I, __m512i __B)
6309 return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
6310 /* idx */ ,
6311 (__v16si) __A,
6312 (__v16si) __B,
6313 (__mmask16) __U);
6316 extern __inline __m512d
6317 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6318 _mm512_permutex2var_pd (__m512d __A, __m512i __I, __m512d __B)
6320 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6321 /* idx */ ,
6322 (__v8df) __A,
6323 (__v8df) __B,
6324 (__mmask8) -1);
6327 extern __inline __m512d
6328 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6329 _mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I,
6330 __m512d __B)
6332 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6333 /* idx */ ,
6334 (__v8df) __A,
6335 (__v8df) __B,
6336 (__mmask8) __U);
6339 extern __inline __m512d
6340 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6341 _mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
6342 __m512d __B)
6344 return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
6345 (__v8di) __I
6346 /* idx */ ,
6347 (__v8df) __B,
6348 (__mmask8) __U);
6351 extern __inline __m512d
6352 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6353 _mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
6354 __m512d __B)
6356 return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
6357 /* idx */ ,
6358 (__v8df) __A,
6359 (__v8df) __B,
6360 (__mmask8) __U);
6363 extern __inline __m512
6364 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6365 _mm512_permutex2var_ps (__m512 __A, __m512i __I, __m512 __B)
6367 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6368 /* idx */ ,
6369 (__v16sf) __A,
6370 (__v16sf) __B,
6371 (__mmask16) -1);
6374 extern __inline __m512
6375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6376 _mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
6378 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6379 /* idx */ ,
6380 (__v16sf) __A,
6381 (__v16sf) __B,
6382 (__mmask16) __U);
6385 extern __inline __m512
6386 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6387 _mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
6388 __m512 __B)
6390 return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
6391 (__v16si) __I
6392 /* idx */ ,
6393 (__v16sf) __B,
6394 (__mmask16) __U);
6397 extern __inline __m512
6398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6399 _mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
6400 __m512 __B)
6402 return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
6403 /* idx */ ,
6404 (__v16sf) __A,
6405 (__v16sf) __B,
6406 (__mmask16) __U);
6409 #ifdef __OPTIMIZE__
6410 extern __inline __m512d
6411 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6412 _mm512_permute_pd (__m512d __X, const int __C)
6414 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6415 (__v8df)
6416 _mm512_undefined_pd (),
6417 (__mmask8) -1);
6420 extern __inline __m512d
6421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6422 _mm512_mask_permute_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __C)
6424 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6425 (__v8df) __W,
6426 (__mmask8) __U);
6429 extern __inline __m512d
6430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6431 _mm512_maskz_permute_pd (__mmask8 __U, __m512d __X, const int __C)
6433 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6434 (__v8df)
6435 _mm512_setzero_pd (),
6436 (__mmask8) __U);
6439 extern __inline __m512
6440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6441 _mm512_permute_ps (__m512 __X, const int __C)
6443 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6444 (__v16sf)
6445 _mm512_undefined_ps (),
6446 (__mmask16) -1);
6449 extern __inline __m512
6450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6451 _mm512_mask_permute_ps (__m512 __W, __mmask16 __U, __m512 __X, const int __C)
6453 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6454 (__v16sf) __W,
6455 (__mmask16) __U);
6458 extern __inline __m512
6459 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6460 _mm512_maskz_permute_ps (__mmask16 __U, __m512 __X, const int __C)
6462 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6463 (__v16sf)
6464 _mm512_setzero_ps (),
6465 (__mmask16) __U);
6467 #else
6468 #define _mm512_permute_pd(X, C) \
6469 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6470 (__v8df)(__m512d)_mm512_undefined_pd(),\
6471 (__mmask8)(-1)))
6473 #define _mm512_mask_permute_pd(W, U, X, C) \
6474 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6475 (__v8df)(__m512d)(W), \
6476 (__mmask8)(U)))
6478 #define _mm512_maskz_permute_pd(U, X, C) \
6479 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6480 (__v8df)(__m512d)_mm512_setzero_pd(), \
6481 (__mmask8)(U)))
6483 #define _mm512_permute_ps(X, C) \
6484 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6485 (__v16sf)(__m512)_mm512_undefined_ps(),\
6486 (__mmask16)(-1)))
6488 #define _mm512_mask_permute_ps(W, U, X, C) \
6489 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6490 (__v16sf)(__m512)(W), \
6491 (__mmask16)(U)))
6493 #define _mm512_maskz_permute_ps(U, X, C) \
6494 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6495 (__v16sf)(__m512)_mm512_setzero_ps(), \
6496 (__mmask16)(U)))
6497 #endif
6499 #ifdef __OPTIMIZE__
6500 extern __inline __m512i
6501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6502 _mm512_permutex_epi64 (__m512i __X, const int __I)
6504 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6505 (__v8di)
6506 _mm512_undefined_epi32 (),
6507 (__mmask8) (-1));
6510 extern __inline __m512i
6511 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6512 _mm512_mask_permutex_epi64 (__m512i __W, __mmask8 __M,
6513 __m512i __X, const int __I)
6515 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6516 (__v8di) __W,
6517 (__mmask8) __M);
6520 extern __inline __m512i
6521 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6522 _mm512_maskz_permutex_epi64 (__mmask8 __M, __m512i __X, const int __I)
6524 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6525 (__v8di)
6526 _mm512_setzero_si512 (),
6527 (__mmask8) __M);
6530 extern __inline __m512d
6531 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6532 _mm512_permutex_pd (__m512d __X, const int __M)
6534 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6535 (__v8df)
6536 _mm512_undefined_pd (),
6537 (__mmask8) -1);
6540 extern __inline __m512d
6541 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6542 _mm512_mask_permutex_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __M)
6544 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6545 (__v8df) __W,
6546 (__mmask8) __U);
6549 extern __inline __m512d
6550 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6551 _mm512_maskz_permutex_pd (__mmask8 __U, __m512d __X, const int __M)
6553 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6554 (__v8df)
6555 _mm512_setzero_pd (),
6556 (__mmask8) __U);
6558 #else
6559 #define _mm512_permutex_pd(X, M) \
6560 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6561 (__v8df)(__m512d)_mm512_undefined_pd(),\
6562 (__mmask8)-1))
6564 #define _mm512_mask_permutex_pd(W, U, X, M) \
6565 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6566 (__v8df)(__m512d)(W), (__mmask8)(U)))
6568 #define _mm512_maskz_permutex_pd(U, X, M) \
6569 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6570 (__v8df)(__m512d)_mm512_setzero_pd(),\
6571 (__mmask8)(U)))
6573 #define _mm512_permutex_epi64(X, I) \
6574 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6575 (int)(I), \
6576 (__v8di)(__m512i) \
6577 (_mm512_undefined_epi32 ()),\
6578 (__mmask8)(-1)))
6580 #define _mm512_maskz_permutex_epi64(M, X, I) \
6581 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6582 (int)(I), \
6583 (__v8di)(__m512i) \
6584 (_mm512_setzero_si512 ()),\
6585 (__mmask8)(M)))
6587 #define _mm512_mask_permutex_epi64(W, M, X, I) \
6588 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6589 (int)(I), \
6590 (__v8di)(__m512i)(W), \
6591 (__mmask8)(M)))
6592 #endif
6594 extern __inline __m512i
6595 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6596 _mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
6598 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6599 (__v8di) __X,
6600 (__v8di)
6601 _mm512_setzero_si512 (),
6602 __M);
6605 extern __inline __m512i
6606 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6607 _mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
6609 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6610 (__v8di) __X,
6611 (__v8di)
6612 _mm512_undefined_epi32 (),
6613 (__mmask8) -1);
6616 extern __inline __m512i
6617 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6618 _mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
6619 __m512i __Y)
6621 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6622 (__v8di) __X,
6623 (__v8di) __W,
6624 __M);
6627 extern __inline __m512i
6628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6629 _mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
6631 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6632 (__v16si) __X,
6633 (__v16si)
6634 _mm512_setzero_si512 (),
6635 __M);
6638 extern __inline __m512i
6639 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6640 _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
6642 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6643 (__v16si) __X,
6644 (__v16si)
6645 _mm512_undefined_epi32 (),
6646 (__mmask16) -1);
6649 extern __inline __m512i
6650 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6651 _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
6652 __m512i __Y)
6654 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6655 (__v16si) __X,
6656 (__v16si) __W,
6657 __M);
6660 extern __inline __m512d
6661 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6662 _mm512_permutexvar_pd (__m512i __X, __m512d __Y)
6664 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6665 (__v8di) __X,
6666 (__v8df)
6667 _mm512_undefined_pd (),
6668 (__mmask8) -1);
6671 extern __inline __m512d
6672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6673 _mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
6675 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6676 (__v8di) __X,
6677 (__v8df) __W,
6678 (__mmask8) __U);
6681 extern __inline __m512d
6682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6683 _mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
6685 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6686 (__v8di) __X,
6687 (__v8df)
6688 _mm512_setzero_pd (),
6689 (__mmask8) __U);
6692 extern __inline __m512
6693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6694 _mm512_permutexvar_ps (__m512i __X, __m512 __Y)
6696 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6697 (__v16si) __X,
6698 (__v16sf)
6699 _mm512_undefined_ps (),
6700 (__mmask16) -1);
6703 extern __inline __m512
6704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6705 _mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
6707 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6708 (__v16si) __X,
6709 (__v16sf) __W,
6710 (__mmask16) __U);
6713 extern __inline __m512
6714 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6715 _mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
6717 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6718 (__v16si) __X,
6719 (__v16sf)
6720 _mm512_setzero_ps (),
6721 (__mmask16) __U);
6724 #ifdef __OPTIMIZE__
6725 extern __inline __m512
6726 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6727 _mm512_shuffle_ps (__m512 __M, __m512 __V, const int __imm)
6729 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6730 (__v16sf) __V, __imm,
6731 (__v16sf)
6732 _mm512_undefined_ps (),
6733 (__mmask16) -1);
6736 extern __inline __m512
6737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6738 _mm512_mask_shuffle_ps (__m512 __W, __mmask16 __U, __m512 __M,
6739 __m512 __V, const int __imm)
6741 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6742 (__v16sf) __V, __imm,
6743 (__v16sf) __W,
6744 (__mmask16) __U);
6747 extern __inline __m512
6748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6749 _mm512_maskz_shuffle_ps (__mmask16 __U, __m512 __M, __m512 __V, const int __imm)
6751 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6752 (__v16sf) __V, __imm,
6753 (__v16sf)
6754 _mm512_setzero_ps (),
6755 (__mmask16) __U);
6758 extern __inline __m512d
6759 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6760 _mm512_shuffle_pd (__m512d __M, __m512d __V, const int __imm)
6762 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6763 (__v8df) __V, __imm,
6764 (__v8df)
6765 _mm512_undefined_pd (),
6766 (__mmask8) -1);
6769 extern __inline __m512d
6770 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6771 _mm512_mask_shuffle_pd (__m512d __W, __mmask8 __U, __m512d __M,
6772 __m512d __V, const int __imm)
6774 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6775 (__v8df) __V, __imm,
6776 (__v8df) __W,
6777 (__mmask8) __U);
6780 extern __inline __m512d
6781 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6782 _mm512_maskz_shuffle_pd (__mmask8 __U, __m512d __M, __m512d __V,
6783 const int __imm)
6785 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6786 (__v8df) __V, __imm,
6787 (__v8df)
6788 _mm512_setzero_pd (),
6789 (__mmask8) __U);
6792 extern __inline __m512d
6793 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6794 _mm512_fixupimm_round_pd (__m512d __A, __m512d __B, __m512i __C,
6795 const int __imm, const int __R)
6797 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
6798 (__v8df) __B,
6799 (__v8di) __C,
6800 __imm,
6801 (__mmask8) -1, __R);
6804 extern __inline __m512d
6805 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6806 _mm512_mask_fixupimm_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
6807 __m512i __C, const int __imm, const int __R)
6809 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
6810 (__v8df) __B,
6811 (__v8di) __C,
6812 __imm,
6813 (__mmask8) __U, __R);
6816 extern __inline __m512d
6817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6818 _mm512_maskz_fixupimm_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
6819 __m512i __C, const int __imm, const int __R)
6821 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
6822 (__v8df) __B,
6823 (__v8di) __C,
6824 __imm,
6825 (__mmask8) __U, __R);
6828 extern __inline __m512
6829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6830 _mm512_fixupimm_round_ps (__m512 __A, __m512 __B, __m512i __C,
6831 const int __imm, const int __R)
6833 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
6834 (__v16sf) __B,
6835 (__v16si) __C,
6836 __imm,
6837 (__mmask16) -1, __R);
6840 extern __inline __m512
6841 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6842 _mm512_mask_fixupimm_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
6843 __m512i __C, const int __imm, const int __R)
6845 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
6846 (__v16sf) __B,
6847 (__v16si) __C,
6848 __imm,
6849 (__mmask16) __U, __R);
6852 extern __inline __m512
6853 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6854 _mm512_maskz_fixupimm_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
6855 __m512i __C, const int __imm, const int __R)
6857 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
6858 (__v16sf) __B,
6859 (__v16si) __C,
6860 __imm,
6861 (__mmask16) __U, __R);
6864 extern __inline __m128d
6865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6866 _mm_fixupimm_round_sd (__m128d __A, __m128d __B, __m128i __C,
6867 const int __imm, const int __R)
6869 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
6870 (__v2df) __B,
6871 (__v2di) __C, __imm,
6872 (__mmask8) -1, __R);
6875 extern __inline __m128d
6876 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6877 _mm_mask_fixupimm_round_sd (__m128d __A, __mmask8 __U, __m128d __B,
6878 __m128i __C, const int __imm, const int __R)
6880 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
6881 (__v2df) __B,
6882 (__v2di) __C, __imm,
6883 (__mmask8) __U, __R);
6886 extern __inline __m128d
6887 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6888 _mm_maskz_fixupimm_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
6889 __m128i __C, const int __imm, const int __R)
6891 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
6892 (__v2df) __B,
6893 (__v2di) __C,
6894 __imm,
6895 (__mmask8) __U, __R);
6898 extern __inline __m128
6899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6900 _mm_fixupimm_round_ss (__m128 __A, __m128 __B, __m128i __C,
6901 const int __imm, const int __R)
6903 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
6904 (__v4sf) __B,
6905 (__v4si) __C, __imm,
6906 (__mmask8) -1, __R);
6909 extern __inline __m128
6910 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6911 _mm_mask_fixupimm_round_ss (__m128 __A, __mmask8 __U, __m128 __B,
6912 __m128i __C, const int __imm, const int __R)
6914 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
6915 (__v4sf) __B,
6916 (__v4si) __C, __imm,
6917 (__mmask8) __U, __R);
6920 extern __inline __m128
6921 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6922 _mm_maskz_fixupimm_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
6923 __m128i __C, const int __imm, const int __R)
6925 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
6926 (__v4sf) __B,
6927 (__v4si) __C, __imm,
6928 (__mmask8) __U, __R);
6931 #else
6932 #define _mm512_shuffle_pd(X, Y, C) \
6933 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6934 (__v8df)(__m512d)(Y), (int)(C),\
6935 (__v8df)(__m512d)_mm512_undefined_pd(),\
6936 (__mmask8)-1))
6938 #define _mm512_mask_shuffle_pd(W, U, X, Y, C) \
6939 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6940 (__v8df)(__m512d)(Y), (int)(C),\
6941 (__v8df)(__m512d)(W),\
6942 (__mmask8)(U)))
6944 #define _mm512_maskz_shuffle_pd(U, X, Y, C) \
6945 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6946 (__v8df)(__m512d)(Y), (int)(C),\
6947 (__v8df)(__m512d)_mm512_setzero_pd(),\
6948 (__mmask8)(U)))
6950 #define _mm512_shuffle_ps(X, Y, C) \
6951 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6952 (__v16sf)(__m512)(Y), (int)(C),\
6953 (__v16sf)(__m512)_mm512_undefined_ps(),\
6954 (__mmask16)-1))
6956 #define _mm512_mask_shuffle_ps(W, U, X, Y, C) \
6957 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6958 (__v16sf)(__m512)(Y), (int)(C),\
6959 (__v16sf)(__m512)(W),\
6960 (__mmask16)(U)))
6962 #define _mm512_maskz_shuffle_ps(U, X, Y, C) \
6963 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6964 (__v16sf)(__m512)(Y), (int)(C),\
6965 (__v16sf)(__m512)_mm512_setzero_ps(),\
6966 (__mmask16)(U)))
6968 #define _mm512_fixupimm_round_pd(X, Y, Z, C, R) \
6969 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
6970 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6971 (__mmask8)(-1), (R)))
6973 #define _mm512_mask_fixupimm_round_pd(X, U, Y, Z, C, R) \
6974 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
6975 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6976 (__mmask8)(U), (R)))
6978 #define _mm512_maskz_fixupimm_round_pd(U, X, Y, Z, C, R) \
6979 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
6980 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6981 (__mmask8)(U), (R)))
6983 #define _mm512_fixupimm_round_ps(X, Y, Z, C, R) \
6984 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
6985 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6986 (__mmask16)(-1), (R)))
6988 #define _mm512_mask_fixupimm_round_ps(X, U, Y, Z, C, R) \
6989 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
6990 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6991 (__mmask16)(U), (R)))
6993 #define _mm512_maskz_fixupimm_round_ps(U, X, Y, Z, C, R) \
6994 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
6995 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6996 (__mmask16)(U), (R)))
6998 #define _mm_fixupimm_round_sd(X, Y, Z, C, R) \
6999 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
7000 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
7001 (__mmask8)(-1), (R)))
7003 #define _mm_mask_fixupimm_round_sd(X, U, Y, Z, C, R) \
7004 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
7005 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
7006 (__mmask8)(U), (R)))
7008 #define _mm_maskz_fixupimm_round_sd(U, X, Y, Z, C, R) \
7009 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
7010 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
7011 (__mmask8)(U), (R)))
7013 #define _mm_fixupimm_round_ss(X, Y, Z, C, R) \
7014 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
7015 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
7016 (__mmask8)(-1), (R)))
7018 #define _mm_mask_fixupimm_round_ss(X, U, Y, Z, C, R) \
7019 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
7020 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
7021 (__mmask8)(U), (R)))
7023 #define _mm_maskz_fixupimm_round_ss(U, X, Y, Z, C, R) \
7024 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
7025 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
7026 (__mmask8)(U), (R)))
7027 #endif
7029 extern __inline __m512
7030 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7031 _mm512_movehdup_ps (__m512 __A)
7033 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
7034 (__v16sf)
7035 _mm512_undefined_ps (),
7036 (__mmask16) -1);
7039 extern __inline __m512
7040 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7041 _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
7043 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
7044 (__v16sf) __W,
7045 (__mmask16) __U);
7048 extern __inline __m512
7049 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7050 _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
7052 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
7053 (__v16sf)
7054 _mm512_setzero_ps (),
7055 (__mmask16) __U);
7058 extern __inline __m512
7059 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7060 _mm512_moveldup_ps (__m512 __A)
7062 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
7063 (__v16sf)
7064 _mm512_undefined_ps (),
7065 (__mmask16) -1);
7068 extern __inline __m512
7069 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7070 _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
7072 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
7073 (__v16sf) __W,
7074 (__mmask16) __U);
7077 extern __inline __m512
7078 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7079 _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
7081 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
7082 (__v16sf)
7083 _mm512_setzero_ps (),
7084 (__mmask16) __U);
7087 extern __inline __m512i
7088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7089 _mm512_or_si512 (__m512i __A, __m512i __B)
7091 return (__m512i) ((__v16su) __A | (__v16su) __B);
7094 extern __inline __m512i
7095 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7096 _mm512_or_epi32 (__m512i __A, __m512i __B)
7098 return (__m512i) ((__v16su) __A | (__v16su) __B);
7101 extern __inline __m512i
7102 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7103 _mm512_mask_or_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7105 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
7106 (__v16si) __B,
7107 (__v16si) __W,
7108 (__mmask16) __U);
7111 extern __inline __m512i
7112 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7113 _mm512_maskz_or_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7115 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
7116 (__v16si) __B,
7117 (__v16si)
7118 _mm512_setzero_si512 (),
7119 (__mmask16) __U);
7122 extern __inline __m512i
7123 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7124 _mm512_or_epi64 (__m512i __A, __m512i __B)
7126 return (__m512i) ((__v8du) __A | (__v8du) __B);
7129 extern __inline __m512i
7130 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7131 _mm512_mask_or_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7133 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
7134 (__v8di) __B,
7135 (__v8di) __W,
7136 (__mmask8) __U);
7139 extern __inline __m512i
7140 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7141 _mm512_maskz_or_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7143 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
7144 (__v8di) __B,
7145 (__v8di)
7146 _mm512_setzero_si512 (),
7147 (__mmask8) __U);
7150 extern __inline __m512i
7151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7152 _mm512_xor_si512 (__m512i __A, __m512i __B)
7154 return (__m512i) ((__v16su) __A ^ (__v16su) __B);
7157 extern __inline __m512i
7158 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7159 _mm512_xor_epi32 (__m512i __A, __m512i __B)
7161 return (__m512i) ((__v16su) __A ^ (__v16su) __B);
7164 extern __inline __m512i
7165 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7166 _mm512_mask_xor_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7168 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
7169 (__v16si) __B,
7170 (__v16si) __W,
7171 (__mmask16) __U);
7174 extern __inline __m512i
7175 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7176 _mm512_maskz_xor_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7178 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
7179 (__v16si) __B,
7180 (__v16si)
7181 _mm512_setzero_si512 (),
7182 (__mmask16) __U);
7185 extern __inline __m512i
7186 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7187 _mm512_xor_epi64 (__m512i __A, __m512i __B)
7189 return (__m512i) ((__v8du) __A ^ (__v8du) __B);
7192 extern __inline __m512i
7193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7194 _mm512_mask_xor_epi64 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7196 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
7197 (__v8di) __B,
7198 (__v8di) __W,
7199 (__mmask8) __U);
7202 extern __inline __m512i
7203 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7204 _mm512_maskz_xor_epi64 (__mmask16 __U, __m512i __A, __m512i __B)
7206 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
7207 (__v8di) __B,
7208 (__v8di)
7209 _mm512_setzero_si512 (),
7210 (__mmask8) __U);
7213 #ifdef __OPTIMIZE__
7214 extern __inline __m512i
7215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7216 _mm512_rol_epi32 (__m512i __A, const int __B)
7218 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
7219 (__v16si)
7220 _mm512_undefined_epi32 (),
7221 (__mmask16) -1);
7224 extern __inline __m512i
7225 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7226 _mm512_mask_rol_epi32 (__m512i __W, __mmask16 __U, __m512i __A, const int __B)
7228 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
7229 (__v16si) __W,
7230 (__mmask16) __U);
7233 extern __inline __m512i
7234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7235 _mm512_maskz_rol_epi32 (__mmask16 __U, __m512i __A, const int __B)
7237 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
7238 (__v16si)
7239 _mm512_setzero_si512 (),
7240 (__mmask16) __U);
7243 extern __inline __m512i
7244 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7245 _mm512_ror_epi32 (__m512i __A, int __B)
7247 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
7248 (__v16si)
7249 _mm512_undefined_epi32 (),
7250 (__mmask16) -1);
7253 extern __inline __m512i
7254 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7255 _mm512_mask_ror_epi32 (__m512i __W, __mmask16 __U, __m512i __A, int __B)
7257 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
7258 (__v16si) __W,
7259 (__mmask16) __U);
7262 extern __inline __m512i
7263 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7264 _mm512_maskz_ror_epi32 (__mmask16 __U, __m512i __A, int __B)
7266 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
7267 (__v16si)
7268 _mm512_setzero_si512 (),
7269 (__mmask16) __U);
7272 extern __inline __m512i
7273 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7274 _mm512_rol_epi64 (__m512i __A, const int __B)
7276 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
7277 (__v8di)
7278 _mm512_undefined_epi32 (),
7279 (__mmask8) -1);
7282 extern __inline __m512i
7283 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7284 _mm512_mask_rol_epi64 (__m512i __W, __mmask8 __U, __m512i __A, const int __B)
7286 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
7287 (__v8di) __W,
7288 (__mmask8) __U);
7291 extern __inline __m512i
7292 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7293 _mm512_maskz_rol_epi64 (__mmask8 __U, __m512i __A, const int __B)
7295 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
7296 (__v8di)
7297 _mm512_setzero_si512 (),
7298 (__mmask8) __U);
7301 extern __inline __m512i
7302 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7303 _mm512_ror_epi64 (__m512i __A, int __B)
7305 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
7306 (__v8di)
7307 _mm512_undefined_epi32 (),
7308 (__mmask8) -1);
7311 extern __inline __m512i
7312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7313 _mm512_mask_ror_epi64 (__m512i __W, __mmask8 __U, __m512i __A, int __B)
7315 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
7316 (__v8di) __W,
7317 (__mmask8) __U);
7320 extern __inline __m512i
7321 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7322 _mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B)
7324 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
7325 (__v8di)
7326 _mm512_setzero_si512 (),
7327 (__mmask8) __U);
7330 #else
7331 #define _mm512_rol_epi32(A, B) \
7332 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7333 (int)(B), \
7334 (__v16si)_mm512_undefined_epi32 (), \
7335 (__mmask16)(-1)))
7336 #define _mm512_mask_rol_epi32(W, U, A, B) \
7337 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7338 (int)(B), \
7339 (__v16si)(__m512i)(W), \
7340 (__mmask16)(U)))
7341 #define _mm512_maskz_rol_epi32(U, A, B) \
7342 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7343 (int)(B), \
7344 (__v16si)_mm512_setzero_si512 (), \
7345 (__mmask16)(U)))
7346 #define _mm512_ror_epi32(A, B) \
7347 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7348 (int)(B), \
7349 (__v16si)_mm512_undefined_epi32 (), \
7350 (__mmask16)(-1)))
7351 #define _mm512_mask_ror_epi32(W, U, A, B) \
7352 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7353 (int)(B), \
7354 (__v16si)(__m512i)(W), \
7355 (__mmask16)(U)))
7356 #define _mm512_maskz_ror_epi32(U, A, B) \
7357 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7358 (int)(B), \
7359 (__v16si)_mm512_setzero_si512 (), \
7360 (__mmask16)(U)))
7361 #define _mm512_rol_epi64(A, B) \
7362 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7363 (int)(B), \
7364 (__v8di)_mm512_undefined_epi32 (), \
7365 (__mmask8)(-1)))
7366 #define _mm512_mask_rol_epi64(W, U, A, B) \
7367 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7368 (int)(B), \
7369 (__v8di)(__m512i)(W), \
7370 (__mmask8)(U)))
7371 #define _mm512_maskz_rol_epi64(U, A, B) \
7372 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7373 (int)(B), \
7374 (__v8di)_mm512_setzero_si512 (), \
7375 (__mmask8)(U)))
7377 #define _mm512_ror_epi64(A, B) \
7378 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7379 (int)(B), \
7380 (__v8di)_mm512_undefined_epi32 (), \
7381 (__mmask8)(-1)))
7382 #define _mm512_mask_ror_epi64(W, U, A, B) \
7383 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7384 (int)(B), \
7385 (__v8di)(__m512i)(W), \
7386 (__mmask8)(U)))
7387 #define _mm512_maskz_ror_epi64(U, A, B) \
7388 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7389 (int)(B), \
7390 (__v8di)_mm512_setzero_si512 (), \
7391 (__mmask8)(U)))
7392 #endif
7394 extern __inline __m512i
7395 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7396 _mm512_and_si512 (__m512i __A, __m512i __B)
7398 return (__m512i) ((__v16su) __A & (__v16su) __B);
7401 extern __inline __m512i
7402 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7403 _mm512_and_epi32 (__m512i __A, __m512i __B)
7405 return (__m512i) ((__v16su) __A & (__v16su) __B);
7408 extern __inline __m512i
7409 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7410 _mm512_mask_and_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7412 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7413 (__v16si) __B,
7414 (__v16si) __W,
7415 (__mmask16) __U);
7418 extern __inline __m512i
7419 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7420 _mm512_maskz_and_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7422 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7423 (__v16si) __B,
7424 (__v16si)
7425 _mm512_setzero_si512 (),
7426 (__mmask16) __U);
7429 extern __inline __m512i
7430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7431 _mm512_and_epi64 (__m512i __A, __m512i __B)
7433 return (__m512i) ((__v8du) __A & (__v8du) __B);
7436 extern __inline __m512i
7437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7438 _mm512_mask_and_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7440 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7441 (__v8di) __B,
7442 (__v8di) __W, __U);
7445 extern __inline __m512i
7446 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7447 _mm512_maskz_and_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7449 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7450 (__v8di) __B,
7451 (__v8di)
7452 _mm512_setzero_pd (),
7453 __U);
7456 extern __inline __m512i
7457 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7458 _mm512_andnot_si512 (__m512i __A, __m512i __B)
7460 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7461 (__v16si) __B,
7462 (__v16si)
7463 _mm512_undefined_epi32 (),
7464 (__mmask16) -1);
7467 extern __inline __m512i
7468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7469 _mm512_andnot_epi32 (__m512i __A, __m512i __B)
7471 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7472 (__v16si) __B,
7473 (__v16si)
7474 _mm512_undefined_epi32 (),
7475 (__mmask16) -1);
7478 extern __inline __m512i
7479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7480 _mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7482 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7483 (__v16si) __B,
7484 (__v16si) __W,
7485 (__mmask16) __U);
7488 extern __inline __m512i
7489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7490 _mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7492 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7493 (__v16si) __B,
7494 (__v16si)
7495 _mm512_setzero_si512 (),
7496 (__mmask16) __U);
7499 extern __inline __m512i
7500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7501 _mm512_andnot_epi64 (__m512i __A, __m512i __B)
7503 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7504 (__v8di) __B,
7505 (__v8di)
7506 _mm512_undefined_epi32 (),
7507 (__mmask8) -1);
7510 extern __inline __m512i
7511 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7512 _mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7514 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7515 (__v8di) __B,
7516 (__v8di) __W, __U);
7519 extern __inline __m512i
7520 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7521 _mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7523 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7524 (__v8di) __B,
7525 (__v8di)
7526 _mm512_setzero_pd (),
7527 __U);
7530 extern __inline __mmask16
7531 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7532 _mm512_test_epi32_mask (__m512i __A, __m512i __B)
7534 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7535 (__v16si) __B,
7536 (__mmask16) -1);
7539 extern __inline __mmask16
7540 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7541 _mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7543 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7544 (__v16si) __B, __U);
7547 extern __inline __mmask8
7548 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7549 _mm512_test_epi64_mask (__m512i __A, __m512i __B)
7551 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
7552 (__v8di) __B,
7553 (__mmask8) -1);
7556 extern __inline __mmask8
7557 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7558 _mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7560 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
7563 extern __inline __mmask16
7564 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7565 _mm512_testn_epi32_mask (__m512i __A, __m512i __B)
7567 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7568 (__v16si) __B,
7569 (__mmask16) -1);
7572 extern __inline __mmask16
7573 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7574 _mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7576 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7577 (__v16si) __B, __U);
7580 extern __inline __mmask8
7581 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7582 _mm512_testn_epi64_mask (__m512i __A, __m512i __B)
7584 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7585 (__v8di) __B,
7586 (__mmask8) -1);
7589 extern __inline __mmask8
7590 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7591 _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7593 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7594 (__v8di) __B, __U);
7597 extern __inline __m512
7598 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7599 _mm512_abs_ps (__m512 __A)
7601 return (__m512) _mm512_and_epi32 ((__m512i) __A,
7602 _mm512_set1_epi32 (0x7fffffff));
7605 extern __inline __m512
7606 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7607 _mm512_mask_abs_ps (__m512 __W, __mmask16 __U, __m512 __A)
7609 return (__m512) _mm512_mask_and_epi32 ((__m512i) __W, __U, (__m512i) __A,
7610 _mm512_set1_epi32 (0x7fffffff));
7613 extern __inline __m512d
7614 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7615 _mm512_abs_pd (__m512 __A)
7617 return (__m512d) _mm512_and_epi64 ((__m512i) __A,
7618 _mm512_set1_epi64 (0x7fffffffffffffffLL));
7621 extern __inline __m512d
7622 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7623 _mm512_mask_abs_pd (__m512d __W, __mmask8 __U, __m512 __A)
7625 return (__m512d)
7626 _mm512_mask_and_epi64 ((__m512i) __W, __U, (__m512i) __A,
7627 _mm512_set1_epi64 (0x7fffffffffffffffLL));
7630 extern __inline __m512i
7631 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7632 _mm512_unpackhi_epi32 (__m512i __A, __m512i __B)
7634 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7635 (__v16si) __B,
7636 (__v16si)
7637 _mm512_undefined_epi32 (),
7638 (__mmask16) -1);
7641 extern __inline __m512i
7642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7643 _mm512_mask_unpackhi_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7644 __m512i __B)
7646 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7647 (__v16si) __B,
7648 (__v16si) __W,
7649 (__mmask16) __U);
7652 extern __inline __m512i
7653 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7654 _mm512_maskz_unpackhi_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7656 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7657 (__v16si) __B,
7658 (__v16si)
7659 _mm512_setzero_si512 (),
7660 (__mmask16) __U);
7663 extern __inline __m512i
7664 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7665 _mm512_unpackhi_epi64 (__m512i __A, __m512i __B)
7667 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7668 (__v8di) __B,
7669 (__v8di)
7670 _mm512_undefined_epi32 (),
7671 (__mmask8) -1);
7674 extern __inline __m512i
7675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7676 _mm512_mask_unpackhi_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7678 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7679 (__v8di) __B,
7680 (__v8di) __W,
7681 (__mmask8) __U);
7684 extern __inline __m512i
7685 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7686 _mm512_maskz_unpackhi_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7688 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7689 (__v8di) __B,
7690 (__v8di)
7691 _mm512_setzero_si512 (),
7692 (__mmask8) __U);
7695 extern __inline __m512i
7696 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7697 _mm512_unpacklo_epi32 (__m512i __A, __m512i __B)
7699 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7700 (__v16si) __B,
7701 (__v16si)
7702 _mm512_undefined_epi32 (),
7703 (__mmask16) -1);
7706 extern __inline __m512i
7707 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7708 _mm512_mask_unpacklo_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7709 __m512i __B)
7711 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7712 (__v16si) __B,
7713 (__v16si) __W,
7714 (__mmask16) __U);
7717 extern __inline __m512i
7718 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7719 _mm512_maskz_unpacklo_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7721 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7722 (__v16si) __B,
7723 (__v16si)
7724 _mm512_setzero_si512 (),
7725 (__mmask16) __U);
7728 extern __inline __m512i
7729 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7730 _mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
7732 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7733 (__v8di) __B,
7734 (__v8di)
7735 _mm512_undefined_epi32 (),
7736 (__mmask8) -1);
7739 extern __inline __m512i
7740 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7741 _mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7743 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7744 (__v8di) __B,
7745 (__v8di) __W,
7746 (__mmask8) __U);
7749 extern __inline __m512i
7750 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7751 _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7753 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7754 (__v8di) __B,
7755 (__v8di)
7756 _mm512_setzero_si512 (),
7757 (__mmask8) __U);
7760 #ifdef __x86_64__
7761 #ifdef __OPTIMIZE__
7762 extern __inline unsigned long long
7763 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7764 _mm_cvt_roundss_u64 (__m128 __A, const int __R)
7766 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) __A, __R);
7769 extern __inline long long
7770 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7771 _mm_cvt_roundss_si64 (__m128 __A, const int __R)
7773 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
7776 extern __inline long long
7777 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7778 _mm_cvt_roundss_i64 (__m128 __A, const int __R)
7780 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
7783 extern __inline unsigned long long
7784 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7785 _mm_cvtt_roundss_u64 (__m128 __A, const int __R)
7787 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) __A, __R);
7790 extern __inline long long
7791 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7792 _mm_cvtt_roundss_i64 (__m128 __A, const int __R)
7794 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
7797 extern __inline long long
7798 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7799 _mm_cvtt_roundss_si64 (__m128 __A, const int __R)
7801 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
7803 #else
7804 #define _mm_cvt_roundss_u64(A, B) \
7805 ((unsigned long long)__builtin_ia32_vcvtss2usi64(A, B))
7807 #define _mm_cvt_roundss_si64(A, B) \
7808 ((long long)__builtin_ia32_vcvtss2si64(A, B))
7810 #define _mm_cvt_roundss_i64(A, B) \
7811 ((long long)__builtin_ia32_vcvtss2si64(A, B))
7813 #define _mm_cvtt_roundss_u64(A, B) \
7814 ((unsigned long long)__builtin_ia32_vcvttss2usi64(A, B))
7816 #define _mm_cvtt_roundss_i64(A, B) \
7817 ((long long)__builtin_ia32_vcvttss2si64(A, B))
7819 #define _mm_cvtt_roundss_si64(A, B) \
7820 ((long long)__builtin_ia32_vcvttss2si64(A, B))
7821 #endif
7822 #endif
7824 #ifdef __OPTIMIZE__
7825 extern __inline unsigned
7826 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7827 _mm_cvt_roundss_u32 (__m128 __A, const int __R)
7829 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, __R);
7832 extern __inline int
7833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7834 _mm_cvt_roundss_si32 (__m128 __A, const int __R)
7836 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
7839 extern __inline int
7840 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7841 _mm_cvt_roundss_i32 (__m128 __A, const int __R)
7843 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
7846 extern __inline unsigned
7847 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7848 _mm_cvtt_roundss_u32 (__m128 __A, const int __R)
7850 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, __R);
7853 extern __inline int
7854 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7855 _mm_cvtt_roundss_i32 (__m128 __A, const int __R)
7857 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
7860 extern __inline int
7861 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7862 _mm_cvtt_roundss_si32 (__m128 __A, const int __R)
7864 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
7866 #else
7867 #define _mm_cvt_roundss_u32(A, B) \
7868 ((unsigned)__builtin_ia32_vcvtss2usi32(A, B))
7870 #define _mm_cvt_roundss_si32(A, B) \
7871 ((int)__builtin_ia32_vcvtss2si32(A, B))
7873 #define _mm_cvt_roundss_i32(A, B) \
7874 ((int)__builtin_ia32_vcvtss2si32(A, B))
7876 #define _mm_cvtt_roundss_u32(A, B) \
7877 ((unsigned)__builtin_ia32_vcvttss2usi32(A, B))
7879 #define _mm_cvtt_roundss_si32(A, B) \
7880 ((int)__builtin_ia32_vcvttss2si32(A, B))
7882 #define _mm_cvtt_roundss_i32(A, B) \
7883 ((int)__builtin_ia32_vcvttss2si32(A, B))
7884 #endif
7886 #ifdef __x86_64__
7887 #ifdef __OPTIMIZE__
7888 extern __inline unsigned long long
7889 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7890 _mm_cvt_roundsd_u64 (__m128d __A, const int __R)
7892 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) __A, __R);
7895 extern __inline long long
7896 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7897 _mm_cvt_roundsd_si64 (__m128d __A, const int __R)
7899 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
7902 extern __inline long long
7903 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7904 _mm_cvt_roundsd_i64 (__m128d __A, const int __R)
7906 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
7909 extern __inline unsigned long long
7910 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7911 _mm_cvtt_roundsd_u64 (__m128d __A, const int __R)
7913 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) __A, __R);
7916 extern __inline long long
7917 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7918 _mm_cvtt_roundsd_si64 (__m128d __A, const int __R)
7920 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
7923 extern __inline long long
7924 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7925 _mm_cvtt_roundsd_i64 (__m128d __A, const int __R)
7927 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
7929 #else
7930 #define _mm_cvt_roundsd_u64(A, B) \
7931 ((unsigned long long)__builtin_ia32_vcvtsd2usi64(A, B))
7933 #define _mm_cvt_roundsd_si64(A, B) \
7934 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
7936 #define _mm_cvt_roundsd_i64(A, B) \
7937 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
7939 #define _mm_cvtt_roundsd_u64(A, B) \
7940 ((unsigned long long)__builtin_ia32_vcvttsd2usi64(A, B))
7942 #define _mm_cvtt_roundsd_si64(A, B) \
7943 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
7945 #define _mm_cvtt_roundsd_i64(A, B) \
7946 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
7947 #endif
7948 #endif
7950 #ifdef __OPTIMIZE__
7951 extern __inline unsigned
7952 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7953 _mm_cvt_roundsd_u32 (__m128d __A, const int __R)
7955 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, __R);
7958 extern __inline int
7959 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7960 _mm_cvt_roundsd_si32 (__m128d __A, const int __R)
7962 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
7965 extern __inline int
7966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7967 _mm_cvt_roundsd_i32 (__m128d __A, const int __R)
7969 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
7972 extern __inline unsigned
7973 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7974 _mm_cvtt_roundsd_u32 (__m128d __A, const int __R)
7976 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, __R);
7979 extern __inline int
7980 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7981 _mm_cvtt_roundsd_i32 (__m128d __A, const int __R)
7983 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
7986 extern __inline int
7987 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7988 _mm_cvtt_roundsd_si32 (__m128d __A, const int __R)
7990 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
7992 #else
7993 #define _mm_cvt_roundsd_u32(A, B) \
7994 ((unsigned)__builtin_ia32_vcvtsd2usi32(A, B))
7996 #define _mm_cvt_roundsd_si32(A, B) \
7997 ((int)__builtin_ia32_vcvtsd2si32(A, B))
7999 #define _mm_cvt_roundsd_i32(A, B) \
8000 ((int)__builtin_ia32_vcvtsd2si32(A, B))
8002 #define _mm_cvtt_roundsd_u32(A, B) \
8003 ((unsigned)__builtin_ia32_vcvttsd2usi32(A, B))
8005 #define _mm_cvtt_roundsd_si32(A, B) \
8006 ((int)__builtin_ia32_vcvttsd2si32(A, B))
8008 #define _mm_cvtt_roundsd_i32(A, B) \
8009 ((int)__builtin_ia32_vcvttsd2si32(A, B))
8010 #endif
8012 extern __inline __m512d
8013 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8014 _mm512_movedup_pd (__m512d __A)
8016 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
8017 (__v8df)
8018 _mm512_undefined_pd (),
8019 (__mmask8) -1);
8022 extern __inline __m512d
8023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8024 _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
8026 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
8027 (__v8df) __W,
8028 (__mmask8) __U);
8031 extern __inline __m512d
8032 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8033 _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
8035 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
8036 (__v8df)
8037 _mm512_setzero_pd (),
8038 (__mmask8) __U);
8041 extern __inline __m512d
8042 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8043 _mm512_unpacklo_pd (__m512d __A, __m512d __B)
8045 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
8046 (__v8df) __B,
8047 (__v8df)
8048 _mm512_undefined_pd (),
8049 (__mmask8) -1);
8052 extern __inline __m512d
8053 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8054 _mm512_mask_unpacklo_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
8056 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
8057 (__v8df) __B,
8058 (__v8df) __W,
8059 (__mmask8) __U);
8062 extern __inline __m512d
8063 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8064 _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
8066 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
8067 (__v8df) __B,
8068 (__v8df)
8069 _mm512_setzero_pd (),
8070 (__mmask8) __U);
8073 extern __inline __m512d
8074 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8075 _mm512_unpackhi_pd (__m512d __A, __m512d __B)
8077 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
8078 (__v8df) __B,
8079 (__v8df)
8080 _mm512_undefined_pd (),
8081 (__mmask8) -1);
8084 extern __inline __m512d
8085 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8086 _mm512_mask_unpackhi_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
8088 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
8089 (__v8df) __B,
8090 (__v8df) __W,
8091 (__mmask8) __U);
8094 extern __inline __m512d
8095 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8096 _mm512_maskz_unpackhi_pd (__mmask8 __U, __m512d __A, __m512d __B)
8098 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
8099 (__v8df) __B,
8100 (__v8df)
8101 _mm512_setzero_pd (),
8102 (__mmask8) __U);
8105 extern __inline __m512
8106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8107 _mm512_unpackhi_ps (__m512 __A, __m512 __B)
8109 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
8110 (__v16sf) __B,
8111 (__v16sf)
8112 _mm512_undefined_ps (),
8113 (__mmask16) -1);
8116 extern __inline __m512
8117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8118 _mm512_mask_unpackhi_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
8120 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
8121 (__v16sf) __B,
8122 (__v16sf) __W,
8123 (__mmask16) __U);
8126 extern __inline __m512
8127 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8128 _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
8130 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
8131 (__v16sf) __B,
8132 (__v16sf)
8133 _mm512_setzero_ps (),
8134 (__mmask16) __U);
8137 #ifdef __OPTIMIZE__
8138 extern __inline __m512d
8139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8140 _mm512_cvt_roundps_pd (__m256 __A, const int __R)
8142 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
8143 (__v8df)
8144 _mm512_undefined_pd (),
8145 (__mmask8) -1, __R);
8148 extern __inline __m512d
8149 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8150 _mm512_mask_cvt_roundps_pd (__m512d __W, __mmask8 __U, __m256 __A,
8151 const int __R)
8153 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
8154 (__v8df) __W,
8155 (__mmask8) __U, __R);
8158 extern __inline __m512d
8159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8160 _mm512_maskz_cvt_roundps_pd (__mmask8 __U, __m256 __A, const int __R)
8162 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
8163 (__v8df)
8164 _mm512_setzero_pd (),
8165 (__mmask8) __U, __R);
8168 extern __inline __m512
8169 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8170 _mm512_cvt_roundph_ps (__m256i __A, const int __R)
8172 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
8173 (__v16sf)
8174 _mm512_undefined_ps (),
8175 (__mmask16) -1, __R);
8178 extern __inline __m512
8179 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8180 _mm512_mask_cvt_roundph_ps (__m512 __W, __mmask16 __U, __m256i __A,
8181 const int __R)
8183 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
8184 (__v16sf) __W,
8185 (__mmask16) __U, __R);
8188 extern __inline __m512
8189 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8190 _mm512_maskz_cvt_roundph_ps (__mmask16 __U, __m256i __A, const int __R)
8192 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
8193 (__v16sf)
8194 _mm512_setzero_ps (),
8195 (__mmask16) __U, __R);
8198 extern __inline __m256i
8199 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8200 _mm512_cvt_roundps_ph (__m512 __A, const int __I)
8202 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8203 __I,
8204 (__v16hi)
8205 _mm256_undefined_si256 (),
8206 -1);
8209 extern __inline __m256i
8210 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8211 _mm512_cvtps_ph (__m512 __A, const int __I)
8213 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8214 __I,
8215 (__v16hi)
8216 _mm256_undefined_si256 (),
8217 -1);
8220 extern __inline __m256i
8221 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8222 _mm512_mask_cvt_roundps_ph (__m256i __U, __mmask16 __W, __m512 __A,
8223 const int __I)
8225 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8226 __I,
8227 (__v16hi) __U,
8228 (__mmask16) __W);
8231 extern __inline __m256i
8232 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8233 _mm512_mask_cvtps_ph (__m256i __U, __mmask16 __W, __m512 __A, const int __I)
8235 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8236 __I,
8237 (__v16hi) __U,
8238 (__mmask16) __W);
8241 extern __inline __m256i
8242 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8243 _mm512_maskz_cvt_roundps_ph (__mmask16 __W, __m512 __A, const int __I)
8245 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8246 __I,
8247 (__v16hi)
8248 _mm256_setzero_si256 (),
8249 (__mmask16) __W);
8252 extern __inline __m256i
8253 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8254 _mm512_maskz_cvtps_ph (__mmask16 __W, __m512 __A, const int __I)
8256 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
8257 __I,
8258 (__v16hi)
8259 _mm256_setzero_si256 (),
8260 (__mmask16) __W);
8262 #else
8263 #define _mm512_cvt_roundps_pd(A, B) \
8264 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, B)
8266 #define _mm512_mask_cvt_roundps_pd(W, U, A, B) \
8267 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)(W), U, B)
8269 #define _mm512_maskz_cvt_roundps_pd(U, A, B) \
8270 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_setzero_pd(), U, B)
8272 #define _mm512_cvt_roundph_ps(A, B) \
8273 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
8275 #define _mm512_mask_cvt_roundph_ps(W, U, A, B) \
8276 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)(W), U, B)
8278 #define _mm512_maskz_cvt_roundph_ps(U, A, B) \
8279 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_setzero_ps(), U, B)
8281 #define _mm512_cvt_roundps_ph(A, I) \
8282 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
8283 (__v16hi)_mm256_undefined_si256 (), -1))
8284 #define _mm512_cvtps_ph(A, I) \
8285 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
8286 (__v16hi)_mm256_undefined_si256 (), -1))
8287 #define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
8288 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
8289 (__v16hi)(__m256i)(U), (__mmask16) (W)))
8290 #define _mm512_mask_cvtps_ph(U, W, A, I) \
8291 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
8292 (__v16hi)(__m256i)(U), (__mmask16) (W)))
8293 #define _mm512_maskz_cvt_roundps_ph(W, A, I) \
8294 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
8295 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
8296 #define _mm512_maskz_cvtps_ph(W, A, I) \
8297 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
8298 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
8299 #endif
8301 #ifdef __OPTIMIZE__
8302 extern __inline __m256
8303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8304 _mm512_cvt_roundpd_ps (__m512d __A, const int __R)
8306 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
8307 (__v8sf)
8308 _mm256_undefined_ps (),
8309 (__mmask8) -1, __R);
8312 extern __inline __m256
8313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8314 _mm512_mask_cvt_roundpd_ps (__m256 __W, __mmask8 __U, __m512d __A,
8315 const int __R)
8317 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
8318 (__v8sf) __W,
8319 (__mmask8) __U, __R);
8322 extern __inline __m256
8323 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8324 _mm512_maskz_cvt_roundpd_ps (__mmask8 __U, __m512d __A, const int __R)
8326 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
8327 (__v8sf)
8328 _mm256_setzero_ps (),
8329 (__mmask8) __U, __R);
8332 extern __inline __m128
8333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8334 _mm_cvt_roundsd_ss (__m128 __A, __m128d __B, const int __R)
8336 return (__m128) __builtin_ia32_cvtsd2ss_round ((__v4sf) __A,
8337 (__v2df) __B,
8338 __R);
8341 extern __inline __m128d
8342 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8343 _mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R)
8345 return (__m128d) __builtin_ia32_cvtss2sd_round ((__v2df) __A,
8346 (__v4sf) __B,
8347 __R);
8349 #else
8350 #define _mm512_cvt_roundpd_ps(A, B) \
8351 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_undefined_ps(), -1, B)
8353 #define _mm512_mask_cvt_roundpd_ps(W, U, A, B) \
8354 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)(W), U, B)
8356 #define _mm512_maskz_cvt_roundpd_ps(U, A, B) \
8357 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), U, B)
8359 #define _mm_cvt_roundsd_ss(A, B, C) \
8360 (__m128)__builtin_ia32_cvtsd2ss_round(A, B, C)
8362 #define _mm_cvt_roundss_sd(A, B, C) \
8363 (__m128d)__builtin_ia32_cvtss2sd_round(A, B, C)
8364 #endif
8366 extern __inline void
8367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8368 _mm512_stream_si512 (__m512i * __P, __m512i __A)
8370 __builtin_ia32_movntdq512 ((__v8di *) __P, (__v8di) __A);
8373 extern __inline void
8374 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8375 _mm512_stream_ps (float *__P, __m512 __A)
8377 __builtin_ia32_movntps512 (__P, (__v16sf) __A);
8380 extern __inline void
8381 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8382 _mm512_stream_pd (double *__P, __m512d __A)
8384 __builtin_ia32_movntpd512 (__P, (__v8df) __A);
8387 extern __inline __m512i
8388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8389 _mm512_stream_load_si512 (void *__P)
8391 return __builtin_ia32_movntdqa512 ((__v8di *)__P);
8394 /* Constants for mantissa extraction */
8395 typedef enum
8397 _MM_MANT_NORM_1_2, /* interval [1, 2) */
8398 _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
8399 _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
8400 _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
8401 } _MM_MANTISSA_NORM_ENUM;
8403 typedef enum
8405 _MM_MANT_SIGN_src, /* sign = sign(SRC) */
8406 _MM_MANT_SIGN_zero, /* sign = 0 */
8407 _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
8408 } _MM_MANTISSA_SIGN_ENUM;
8410 #ifdef __OPTIMIZE__
8411 extern __inline __m128
8412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8413 _mm_getexp_round_ss (__m128 __A, __m128 __B, const int __R)
8415 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
8416 (__v4sf) __B,
8417 __R);
8420 extern __inline __m128
8421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8422 _mm_mask_getexp_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
8423 __m128 __B, const int __R)
8425 return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
8426 (__v4sf) __B,
8427 (__v4sf) __W,
8428 (__mmask8) __U, __R);
8431 extern __inline __m128
8432 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8433 _mm_maskz_getexp_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
8434 const int __R)
8436 return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
8437 (__v4sf) __B,
8438 (__v4sf)
8439 _mm_setzero_ps (),
8440 (__mmask8) __U, __R);
8443 extern __inline __m128d
8444 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8445 _mm_getexp_round_sd (__m128d __A, __m128d __B, const int __R)
8447 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
8448 (__v2df) __B,
8449 __R);
8452 extern __inline __m128d
8453 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8454 _mm_mask_getexp_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
8455 __m128d __B, const int __R)
8457 return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
8458 (__v2df) __B,
8459 (__v2df) __W,
8460 (__mmask8) __U, __R);
8463 extern __inline __m128d
8464 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8465 _mm_maskz_getexp_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
8466 const int __R)
8468 return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
8469 (__v2df) __B,
8470 (__v2df)
8471 _mm_setzero_pd (),
8472 (__mmask8) __U, __R);
8475 extern __inline __m512
8476 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8477 _mm512_getexp_round_ps (__m512 __A, const int __R)
8479 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8480 (__v16sf)
8481 _mm512_undefined_ps (),
8482 (__mmask16) -1, __R);
8485 extern __inline __m512
8486 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8487 _mm512_mask_getexp_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8488 const int __R)
8490 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8491 (__v16sf) __W,
8492 (__mmask16) __U, __R);
8495 extern __inline __m512
8496 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8497 _mm512_maskz_getexp_round_ps (__mmask16 __U, __m512 __A, const int __R)
8499 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8500 (__v16sf)
8501 _mm512_setzero_ps (),
8502 (__mmask16) __U, __R);
8505 extern __inline __m512d
8506 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8507 _mm512_getexp_round_pd (__m512d __A, const int __R)
8509 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8510 (__v8df)
8511 _mm512_undefined_pd (),
8512 (__mmask8) -1, __R);
8515 extern __inline __m512d
8516 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8517 _mm512_mask_getexp_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8518 const int __R)
8520 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8521 (__v8df) __W,
8522 (__mmask8) __U, __R);
8525 extern __inline __m512d
8526 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8527 _mm512_maskz_getexp_round_pd (__mmask8 __U, __m512d __A, const int __R)
8529 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8530 (__v8df)
8531 _mm512_setzero_pd (),
8532 (__mmask8) __U, __R);
8535 extern __inline __m512d
8536 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8537 _mm512_getmant_round_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
8538 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8540 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8541 (__C << 2) | __B,
8542 _mm512_undefined_pd (),
8543 (__mmask8) -1, __R);
8546 extern __inline __m512d
8547 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8548 _mm512_mask_getmant_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8549 _MM_MANTISSA_NORM_ENUM __B,
8550 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8552 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8553 (__C << 2) | __B,
8554 (__v8df) __W, __U,
8555 __R);
8558 extern __inline __m512d
8559 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8560 _mm512_maskz_getmant_round_pd (__mmask8 __U, __m512d __A,
8561 _MM_MANTISSA_NORM_ENUM __B,
8562 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8564 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8565 (__C << 2) | __B,
8566 (__v8df)
8567 _mm512_setzero_pd (),
8568 __U, __R);
8571 extern __inline __m512
8572 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8573 _mm512_getmant_round_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
8574 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8576 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8577 (__C << 2) | __B,
8578 _mm512_undefined_ps (),
8579 (__mmask16) -1, __R);
8582 extern __inline __m512
8583 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8584 _mm512_mask_getmant_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8585 _MM_MANTISSA_NORM_ENUM __B,
8586 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8588 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8589 (__C << 2) | __B,
8590 (__v16sf) __W, __U,
8591 __R);
8594 extern __inline __m512
8595 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8596 _mm512_maskz_getmant_round_ps (__mmask16 __U, __m512 __A,
8597 _MM_MANTISSA_NORM_ENUM __B,
8598 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8600 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8601 (__C << 2) | __B,
8602 (__v16sf)
8603 _mm512_setzero_ps (),
8604 __U, __R);
8607 extern __inline __m128d
8608 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8609 _mm_getmant_round_sd (__m128d __A, __m128d __B,
8610 _MM_MANTISSA_NORM_ENUM __C,
8611 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8613 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
8614 (__v2df) __B,
8615 (__D << 2) | __C,
8616 __R);
8619 extern __inline __m128d
8620 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8621 _mm_mask_getmant_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
8622 __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
8623 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8625 return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
8626 (__v2df) __B,
8627 (__D << 2) | __C,
8628 (__v2df) __W,
8629 __U, __R);
8632 extern __inline __m128d
8633 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8634 _mm_maskz_getmant_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
8635 _MM_MANTISSA_NORM_ENUM __C,
8636 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8638 return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
8639 (__v2df) __B,
8640 (__D << 2) | __C,
8641 (__v2df)
8642 _mm_setzero_pd(),
8643 __U, __R);
8646 extern __inline __m128
8647 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8648 _mm_getmant_round_ss (__m128 __A, __m128 __B,
8649 _MM_MANTISSA_NORM_ENUM __C,
8650 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8652 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
8653 (__v4sf) __B,
8654 (__D << 2) | __C,
8655 __R);
8658 extern __inline __m128
8659 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8660 _mm_mask_getmant_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
8661 __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
8662 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8664 return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
8665 (__v4sf) __B,
8666 (__D << 2) | __C,
8667 (__v4sf) __W,
8668 __U, __R);
8671 extern __inline __m128
8672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8673 _mm_maskz_getmant_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
8674 _MM_MANTISSA_NORM_ENUM __C,
8675 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8677 return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
8678 (__v4sf) __B,
8679 (__D << 2) | __C,
8680 (__v4sf)
8681 _mm_setzero_ps(),
8682 __U, __R);
8685 #else
8686 #define _mm512_getmant_round_pd(X, B, C, R) \
8687 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8688 (int)(((C)<<2) | (B)), \
8689 (__v8df)(__m512d)_mm512_undefined_pd(), \
8690 (__mmask8)-1,\
8691 (R)))
8693 #define _mm512_mask_getmant_round_pd(W, U, X, B, C, R) \
8694 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8695 (int)(((C)<<2) | (B)), \
8696 (__v8df)(__m512d)(W), \
8697 (__mmask8)(U),\
8698 (R)))
8700 #define _mm512_maskz_getmant_round_pd(U, X, B, C, R) \
8701 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8702 (int)(((C)<<2) | (B)), \
8703 (__v8df)(__m512d)_mm512_setzero_pd(), \
8704 (__mmask8)(U),\
8705 (R)))
8706 #define _mm512_getmant_round_ps(X, B, C, R) \
8707 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8708 (int)(((C)<<2) | (B)), \
8709 (__v16sf)(__m512)_mm512_undefined_ps(), \
8710 (__mmask16)-1,\
8711 (R)))
8713 #define _mm512_mask_getmant_round_ps(W, U, X, B, C, R) \
8714 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8715 (int)(((C)<<2) | (B)), \
8716 (__v16sf)(__m512)(W), \
8717 (__mmask16)(U),\
8718 (R)))
8720 #define _mm512_maskz_getmant_round_ps(U, X, B, C, R) \
8721 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8722 (int)(((C)<<2) | (B)), \
8723 (__v16sf)(__m512)_mm512_setzero_ps(), \
8724 (__mmask16)(U),\
8725 (R)))
8726 #define _mm_getmant_round_sd(X, Y, C, D, R) \
8727 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
8728 (__v2df)(__m128d)(Y), \
8729 (int)(((D)<<2) | (C)), \
8730 (R)))
8732 #define _mm_mask_getmant_round_sd(W, U, X, Y, C, D, R) \
8733 ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
8734 (__v2df)(__m128d)(Y), \
8735 (int)(((D)<<2) | (C)), \
8736 (__v2df)(__m128d)(W), \
8737 (__mmask8)(U),\
8738 (R)))
8740 #define _mm_maskz_getmant_round_sd(U, X, Y, C, D, R) \
8741 ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
8742 (__v2df)(__m128d)(Y), \
8743 (int)(((D)<<2) | (C)), \
8744 (__v2df)(__m128d)_mm_setzero_pd(), \
8745 (__mmask8)(U),\
8746 (R)))
8748 #define _mm_getmant_round_ss(X, Y, C, D, R) \
8749 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
8750 (__v4sf)(__m128)(Y), \
8751 (int)(((D)<<2) | (C)), \
8752 (R)))
8754 #define _mm_mask_getmant_round_ss(W, U, X, Y, C, D, R) \
8755 ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
8756 (__v4sf)(__m128)(Y), \
8757 (int)(((D)<<2) | (C)), \
8758 (__v4sf)(__m128)(W), \
8759 (__mmask8)(U),\
8760 (R)))
8762 #define _mm_maskz_getmant_round_ss(U, X, Y, C, D, R) \
8763 ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
8764 (__v4sf)(__m128)(Y), \
8765 (int)(((D)<<2) | (C)), \
8766 (__v4sf)(__m128)_mm_setzero_ps(), \
8767 (__mmask8)(U),\
8768 (R)))
8770 #define _mm_getexp_round_ss(A, B, R) \
8771 ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), R))
8773 #define _mm_mask_getexp_round_ss(W, U, A, B, C) \
8774 (__m128)__builtin_ia32_getexpss_mask_round(A, B, W, U, C)
8776 #define _mm_maskz_getexp_round_ss(U, A, B, C) \
8777 (__m128)__builtin_ia32_getexpss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
8779 #define _mm_getexp_round_sd(A, B, R) \
8780 ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), R))
8782 #define _mm_mask_getexp_round_sd(W, U, A, B, C) \
8783 (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, W, U, C)
8785 #define _mm_maskz_getexp_round_sd(U, A, B, C) \
8786 (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
8789 #define _mm512_getexp_round_ps(A, R) \
8790 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8791 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, R))
8793 #define _mm512_mask_getexp_round_ps(W, U, A, R) \
8794 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8795 (__v16sf)(__m512)(W), (__mmask16)(U), R))
8797 #define _mm512_maskz_getexp_round_ps(U, A, R) \
8798 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8799 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), R))
8801 #define _mm512_getexp_round_pd(A, R) \
8802 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8803 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, R))
8805 #define _mm512_mask_getexp_round_pd(W, U, A, R) \
8806 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8807 (__v8df)(__m512d)(W), (__mmask8)(U), R))
8809 #define _mm512_maskz_getexp_round_pd(U, A, R) \
8810 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8811 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), R))
8812 #endif
8814 #ifdef __OPTIMIZE__
8815 extern __inline __m512
8816 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8817 _mm512_roundscale_round_ps (__m512 __A, const int __imm, const int __R)
8819 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
8820 (__v16sf)
8821 _mm512_undefined_ps (),
8822 -1, __R);
8825 extern __inline __m512
8826 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8827 _mm512_mask_roundscale_round_ps (__m512 __A, __mmask16 __B, __m512 __C,
8828 const int __imm, const int __R)
8830 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
8831 (__v16sf) __A,
8832 (__mmask16) __B, __R);
8835 extern __inline __m512
8836 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8837 _mm512_maskz_roundscale_round_ps (__mmask16 __A, __m512 __B,
8838 const int __imm, const int __R)
8840 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
8841 __imm,
8842 (__v16sf)
8843 _mm512_setzero_ps (),
8844 (__mmask16) __A, __R);
8847 extern __inline __m512d
8848 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8849 _mm512_roundscale_round_pd (__m512d __A, const int __imm, const int __R)
8851 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
8852 (__v8df)
8853 _mm512_undefined_pd (),
8854 -1, __R);
8857 extern __inline __m512d
8858 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8859 _mm512_mask_roundscale_round_pd (__m512d __A, __mmask8 __B,
8860 __m512d __C, const int __imm, const int __R)
8862 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
8863 (__v8df) __A,
8864 (__mmask8) __B, __R);
8867 extern __inline __m512d
8868 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8869 _mm512_maskz_roundscale_round_pd (__mmask8 __A, __m512d __B,
8870 const int __imm, const int __R)
8872 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
8873 __imm,
8874 (__v8df)
8875 _mm512_setzero_pd (),
8876 (__mmask8) __A, __R);
8879 extern __inline __m128
8880 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8881 _mm_roundscale_round_ss (__m128 __A, __m128 __B, const int __imm, const int __R)
8883 return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
8884 (__v4sf) __B, __imm, __R);
8887 extern __inline __m128d
8888 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8889 _mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm,
8890 const int __R)
8892 return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
8893 (__v2df) __B, __imm, __R);
8896 #else
8897 #define _mm512_roundscale_round_ps(A, B, R) \
8898 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
8899 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), R))
8900 #define _mm512_mask_roundscale_round_ps(A, B, C, D, R) \
8901 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
8902 (int)(D), \
8903 (__v16sf)(__m512)(A), \
8904 (__mmask16)(B), R))
8905 #define _mm512_maskz_roundscale_round_ps(A, B, C, R) \
8906 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
8907 (int)(C), \
8908 (__v16sf)_mm512_setzero_ps(),\
8909 (__mmask16)(A), R))
8910 #define _mm512_roundscale_round_pd(A, B, R) \
8911 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
8912 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), R))
8913 #define _mm512_mask_roundscale_round_pd(A, B, C, D, R) \
8914 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
8915 (int)(D), \
8916 (__v8df)(__m512d)(A), \
8917 (__mmask8)(B), R))
8918 #define _mm512_maskz_roundscale_round_pd(A, B, C, R) \
8919 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
8920 (int)(C), \
8921 (__v8df)_mm512_setzero_pd(),\
8922 (__mmask8)(A), R))
8923 #define _mm_roundscale_round_ss(A, B, C, R) \
8924 ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \
8925 (__v4sf)(__m128)(B), (int)(C), R))
8926 #define _mm_roundscale_round_sd(A, B, C, R) \
8927 ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \
8928 (__v2df)(__m128d)(B), (int)(C), R))
8929 #endif
8931 extern __inline __m512
8932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8933 _mm512_floor_ps (__m512 __A)
8935 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8936 _MM_FROUND_FLOOR,
8937 (__v16sf) __A, -1,
8938 _MM_FROUND_CUR_DIRECTION);
8941 extern __inline __m512d
8942 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8943 _mm512_floor_pd (__m512d __A)
8945 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8946 _MM_FROUND_FLOOR,
8947 (__v8df) __A, -1,
8948 _MM_FROUND_CUR_DIRECTION);
8951 extern __inline __m512
8952 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8953 _mm512_ceil_ps (__m512 __A)
8955 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8956 _MM_FROUND_CEIL,
8957 (__v16sf) __A, -1,
8958 _MM_FROUND_CUR_DIRECTION);
8961 extern __inline __m512d
8962 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8963 _mm512_ceil_pd (__m512d __A)
8965 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8966 _MM_FROUND_CEIL,
8967 (__v8df) __A, -1,
8968 _MM_FROUND_CUR_DIRECTION);
8971 extern __inline __m512
8972 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8973 _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
8975 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8976 _MM_FROUND_FLOOR,
8977 (__v16sf) __W, __U,
8978 _MM_FROUND_CUR_DIRECTION);
8981 extern __inline __m512d
8982 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8983 _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
8985 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8986 _MM_FROUND_FLOOR,
8987 (__v8df) __W, __U,
8988 _MM_FROUND_CUR_DIRECTION);
8991 extern __inline __m512
8992 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8993 _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
8995 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8996 _MM_FROUND_CEIL,
8997 (__v16sf) __W, __U,
8998 _MM_FROUND_CUR_DIRECTION);
9001 extern __inline __m512d
9002 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9003 _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
9005 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
9006 _MM_FROUND_CEIL,
9007 (__v8df) __W, __U,
9008 _MM_FROUND_CUR_DIRECTION);
9011 #ifdef __OPTIMIZE__
9012 extern __inline __m512i
9013 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9014 _mm512_alignr_epi32 (__m512i __A, __m512i __B, const int __imm)
9016 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
9017 (__v16si) __B, __imm,
9018 (__v16si)
9019 _mm512_undefined_epi32 (),
9020 (__mmask16) -1);
9023 extern __inline __m512i
9024 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9025 _mm512_mask_alignr_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
9026 __m512i __B, const int __imm)
9028 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
9029 (__v16si) __B, __imm,
9030 (__v16si) __W,
9031 (__mmask16) __U);
9034 extern __inline __m512i
9035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9036 _mm512_maskz_alignr_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
9037 const int __imm)
9039 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
9040 (__v16si) __B, __imm,
9041 (__v16si)
9042 _mm512_setzero_si512 (),
9043 (__mmask16) __U);
9046 extern __inline __m512i
9047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9048 _mm512_alignr_epi64 (__m512i __A, __m512i __B, const int __imm)
9050 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
9051 (__v8di) __B, __imm,
9052 (__v8di)
9053 _mm512_undefined_epi32 (),
9054 (__mmask8) -1);
9057 extern __inline __m512i
9058 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9059 _mm512_mask_alignr_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
9060 __m512i __B, const int __imm)
9062 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
9063 (__v8di) __B, __imm,
9064 (__v8di) __W,
9065 (__mmask8) __U);
9068 extern __inline __m512i
9069 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9070 _mm512_maskz_alignr_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
9071 const int __imm)
9073 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
9074 (__v8di) __B, __imm,
9075 (__v8di)
9076 _mm512_setzero_si512 (),
9077 (__mmask8) __U);
9079 #else
9080 #define _mm512_alignr_epi32(X, Y, C) \
9081 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
9082 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_undefined_epi32 (),\
9083 (__mmask16)-1))
9085 #define _mm512_mask_alignr_epi32(W, U, X, Y, C) \
9086 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
9087 (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)(W), \
9088 (__mmask16)(U)))
9090 #define _mm512_maskz_alignr_epi32(U, X, Y, C) \
9091 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
9092 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_setzero_si512 (),\
9093 (__mmask16)(U)))
9095 #define _mm512_alignr_epi64(X, Y, C) \
9096 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
9097 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_undefined_epi32 (), \
9098 (__mmask8)-1))
9100 #define _mm512_mask_alignr_epi64(W, U, X, Y, C) \
9101 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
9102 (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)(W), (__mmask8)(U)))
9104 #define _mm512_maskz_alignr_epi64(U, X, Y, C) \
9105 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
9106 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_setzero_si512 (),\
9107 (__mmask8)(U)))
9108 #endif
9110 extern __inline __mmask16
9111 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9112 _mm512_cmpeq_epi32_mask (__m512i __A, __m512i __B)
9114 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
9115 (__v16si) __B,
9116 (__mmask16) -1);
9119 extern __inline __mmask16
9120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9121 _mm512_mask_cmpeq_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
9123 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
9124 (__v16si) __B, __U);
9127 extern __inline __mmask8
9128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9129 _mm512_mask_cmpeq_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
9131 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
9132 (__v8di) __B, __U);
9135 extern __inline __mmask8
9136 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9137 _mm512_cmpeq_epi64_mask (__m512i __A, __m512i __B)
9139 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
9140 (__v8di) __B,
9141 (__mmask8) -1);
9144 extern __inline __mmask16
9145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9146 _mm512_cmpgt_epi32_mask (__m512i __A, __m512i __B)
9148 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
9149 (__v16si) __B,
9150 (__mmask16) -1);
9153 extern __inline __mmask16
9154 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9155 _mm512_mask_cmpgt_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
9157 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
9158 (__v16si) __B, __U);
9161 extern __inline __mmask8
9162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9163 _mm512_mask_cmpgt_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
9165 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
9166 (__v8di) __B, __U);
9169 extern __inline __mmask8
9170 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9171 _mm512_cmpgt_epi64_mask (__m512i __A, __m512i __B)
9173 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
9174 (__v8di) __B,
9175 (__mmask8) -1);
9178 extern __inline __mmask16
9179 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9180 _mm512_cmpge_epi32_mask (__m512i __X, __m512i __Y)
9182 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9183 (__v16si) __Y, 5,
9184 (__mmask16) -1);
9187 extern __inline __mmask16
9188 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9189 _mm512_mask_cmpge_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9191 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9192 (__v16si) __Y, 5,
9193 (__mmask16) __M);
9196 extern __inline __mmask16
9197 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9198 _mm512_mask_cmpge_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9200 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9201 (__v16si) __Y, 5,
9202 (__mmask16) __M);
9205 extern __inline __mmask16
9206 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9207 _mm512_cmpge_epu32_mask (__m512i __X, __m512i __Y)
9209 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9210 (__v16si) __Y, 5,
9211 (__mmask16) -1);
9214 extern __inline __mmask8
9215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9216 _mm512_mask_cmpge_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9218 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9219 (__v8di) __Y, 5,
9220 (__mmask8) __M);
9223 extern __inline __mmask8
9224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9225 _mm512_cmpge_epi64_mask (__m512i __X, __m512i __Y)
9227 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9228 (__v8di) __Y, 5,
9229 (__mmask8) -1);
9232 extern __inline __mmask8
9233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9234 _mm512_mask_cmpge_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9236 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9237 (__v8di) __Y, 5,
9238 (__mmask8) __M);
9241 extern __inline __mmask8
9242 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9243 _mm512_cmpge_epu64_mask (__m512i __X, __m512i __Y)
9245 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9246 (__v8di) __Y, 5,
9247 (__mmask8) -1);
9250 extern __inline __mmask16
9251 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9252 _mm512_mask_cmple_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9254 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9255 (__v16si) __Y, 2,
9256 (__mmask16) __M);
9259 extern __inline __mmask16
9260 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9261 _mm512_cmple_epi32_mask (__m512i __X, __m512i __Y)
9263 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9264 (__v16si) __Y, 2,
9265 (__mmask16) -1);
9268 extern __inline __mmask16
9269 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9270 _mm512_mask_cmple_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9272 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9273 (__v16si) __Y, 2,
9274 (__mmask16) __M);
9277 extern __inline __mmask16
9278 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9279 _mm512_cmple_epu32_mask (__m512i __X, __m512i __Y)
9281 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9282 (__v16si) __Y, 2,
9283 (__mmask16) -1);
9286 extern __inline __mmask8
9287 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9288 _mm512_mask_cmple_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9290 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9291 (__v8di) __Y, 2,
9292 (__mmask8) __M);
9295 extern __inline __mmask8
9296 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9297 _mm512_cmple_epi64_mask (__m512i __X, __m512i __Y)
9299 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9300 (__v8di) __Y, 2,
9301 (__mmask8) -1);
9304 extern __inline __mmask8
9305 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9306 _mm512_mask_cmple_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9308 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9309 (__v8di) __Y, 2,
9310 (__mmask8) __M);
9313 extern __inline __mmask8
9314 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9315 _mm512_cmple_epu64_mask (__m512i __X, __m512i __Y)
9317 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9318 (__v8di) __Y, 2,
9319 (__mmask8) -1);
9322 extern __inline __mmask16
9323 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9324 _mm512_mask_cmplt_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9326 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9327 (__v16si) __Y, 1,
9328 (__mmask16) __M);
9331 extern __inline __mmask16
9332 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9333 _mm512_cmplt_epi32_mask (__m512i __X, __m512i __Y)
9335 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9336 (__v16si) __Y, 1,
9337 (__mmask16) -1);
9340 extern __inline __mmask16
9341 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9342 _mm512_mask_cmplt_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9344 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9345 (__v16si) __Y, 1,
9346 (__mmask16) __M);
9349 extern __inline __mmask16
9350 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9351 _mm512_cmplt_epu32_mask (__m512i __X, __m512i __Y)
9353 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9354 (__v16si) __Y, 1,
9355 (__mmask16) -1);
9358 extern __inline __mmask8
9359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9360 _mm512_mask_cmplt_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9362 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9363 (__v8di) __Y, 1,
9364 (__mmask8) __M);
9367 extern __inline __mmask8
9368 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9369 _mm512_cmplt_epi64_mask (__m512i __X, __m512i __Y)
9371 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9372 (__v8di) __Y, 1,
9373 (__mmask8) -1);
9376 extern __inline __mmask8
9377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9378 _mm512_mask_cmplt_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9380 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9381 (__v8di) __Y, 1,
9382 (__mmask8) __M);
9385 extern __inline __mmask8
9386 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9387 _mm512_cmplt_epu64_mask (__m512i __X, __m512i __Y)
9389 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9390 (__v8di) __Y, 1,
9391 (__mmask8) -1);
9394 extern __inline __mmask16
9395 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9396 _mm512_cmpneq_epi32_mask (__m512i __X, __m512i __Y)
9398 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9399 (__v16si) __Y, 4,
9400 (__mmask16) -1);
9403 extern __inline __mmask16
9404 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9405 _mm512_mask_cmpneq_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9407 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9408 (__v16si) __Y, 4,
9409 (__mmask16) __M);
9412 extern __inline __mmask16
9413 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9414 _mm512_mask_cmpneq_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9416 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9417 (__v16si) __Y, 4,
9418 (__mmask16) __M);
9421 extern __inline __mmask16
9422 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9423 _mm512_cmpneq_epu32_mask (__m512i __X, __m512i __Y)
9425 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9426 (__v16si) __Y, 4,
9427 (__mmask16) -1);
9430 extern __inline __mmask8
9431 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9432 _mm512_mask_cmpneq_epi64_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9434 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9435 (__v8di) __Y, 4,
9436 (__mmask8) __M);
9439 extern __inline __mmask8
9440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9441 _mm512_cmpneq_epi64_mask (__m512i __X, __m512i __Y)
9443 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9444 (__v8di) __Y, 4,
9445 (__mmask8) -1);
9448 extern __inline __mmask8
9449 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9450 _mm512_mask_cmpneq_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9452 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9453 (__v8di) __Y, 4,
9454 (__mmask8) __M);
9457 extern __inline __mmask8
9458 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9459 _mm512_cmpneq_epu64_mask (__m512i __X, __m512i __Y)
9461 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9462 (__v8di) __Y, 4,
9463 (__mmask8) -1);
9466 #define _MM_CMPINT_EQ 0x0
9467 #define _MM_CMPINT_LT 0x1
9468 #define _MM_CMPINT_LE 0x2
9469 #define _MM_CMPINT_UNUSED 0x3
9470 #define _MM_CMPINT_NE 0x4
9471 #define _MM_CMPINT_NLT 0x5
9472 #define _MM_CMPINT_GE 0x5
9473 #define _MM_CMPINT_NLE 0x6
9474 #define _MM_CMPINT_GT 0x6
9476 #ifdef __OPTIMIZE__
9477 extern __inline __mmask16
9478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9479 _kshiftli_mask16 (__mmask16 __A, unsigned int __B)
9481 return (__mmask16) __builtin_ia32_kshiftlihi ((__mmask16) __A,
9482 (__mmask8) __B);
9485 extern __inline __mmask16
9486 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9487 _kshiftri_mask16 (__mmask16 __A, unsigned int __B)
9489 return (__mmask16) __builtin_ia32_kshiftrihi ((__mmask16) __A,
9490 (__mmask8) __B);
9493 extern __inline __mmask8
9494 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9495 _mm512_cmp_epi64_mask (__m512i __X, __m512i __Y, const int __P)
9497 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9498 (__v8di) __Y, __P,
9499 (__mmask8) -1);
9502 extern __inline __mmask16
9503 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9504 _mm512_cmp_epi32_mask (__m512i __X, __m512i __Y, const int __P)
9506 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9507 (__v16si) __Y, __P,
9508 (__mmask16) -1);
9511 extern __inline __mmask8
9512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9513 _mm512_cmp_epu64_mask (__m512i __X, __m512i __Y, const int __P)
9515 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9516 (__v8di) __Y, __P,
9517 (__mmask8) -1);
9520 extern __inline __mmask16
9521 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9522 _mm512_cmp_epu32_mask (__m512i __X, __m512i __Y, const int __P)
9524 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9525 (__v16si) __Y, __P,
9526 (__mmask16) -1);
9529 extern __inline __mmask8
9530 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9531 _mm512_cmp_round_pd_mask (__m512d __X, __m512d __Y, const int __P,
9532 const int __R)
9534 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
9535 (__v8df) __Y, __P,
9536 (__mmask8) -1, __R);
9539 extern __inline __mmask16
9540 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9541 _mm512_cmp_round_ps_mask (__m512 __X, __m512 __Y, const int __P, const int __R)
9543 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
9544 (__v16sf) __Y, __P,
9545 (__mmask16) -1, __R);
9548 extern __inline __mmask8
9549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9550 _mm512_mask_cmp_epi64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
9551 const int __P)
9553 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9554 (__v8di) __Y, __P,
9555 (__mmask8) __U);
9558 extern __inline __mmask16
9559 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9560 _mm512_mask_cmp_epi32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
9561 const int __P)
9563 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9564 (__v16si) __Y, __P,
9565 (__mmask16) __U);
9568 extern __inline __mmask8
9569 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9570 _mm512_mask_cmp_epu64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
9571 const int __P)
9573 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9574 (__v8di) __Y, __P,
9575 (__mmask8) __U);
9578 extern __inline __mmask16
9579 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9580 _mm512_mask_cmp_epu32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
9581 const int __P)
9583 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9584 (__v16si) __Y, __P,
9585 (__mmask16) __U);
9588 extern __inline __mmask8
9589 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9590 _mm512_mask_cmp_round_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y,
9591 const int __P, const int __R)
9593 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
9594 (__v8df) __Y, __P,
9595 (__mmask8) __U, __R);
9598 extern __inline __mmask16
9599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9600 _mm512_mask_cmp_round_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y,
9601 const int __P, const int __R)
9603 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
9604 (__v16sf) __Y, __P,
9605 (__mmask16) __U, __R);
9608 extern __inline __mmask8
9609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9610 _mm_cmp_round_sd_mask (__m128d __X, __m128d __Y, const int __P, const int __R)
9612 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
9613 (__v2df) __Y, __P,
9614 (__mmask8) -1, __R);
9617 extern __inline __mmask8
9618 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9619 _mm_mask_cmp_round_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y,
9620 const int __P, const int __R)
9622 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
9623 (__v2df) __Y, __P,
9624 (__mmask8) __M, __R);
9627 extern __inline __mmask8
9628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9629 _mm_cmp_round_ss_mask (__m128 __X, __m128 __Y, const int __P, const int __R)
9631 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
9632 (__v4sf) __Y, __P,
9633 (__mmask8) -1, __R);
9636 extern __inline __mmask8
9637 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9638 _mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y,
9639 const int __P, const int __R)
9641 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
9642 (__v4sf) __Y, __P,
9643 (__mmask8) __M, __R);
9646 #else
9647 #define _kshiftli_mask16(X, Y) \
9648 ((__mmask16) __builtin_ia32_kshiftlihi ((__mmask16)(X), (__mmask8)(Y)))
9650 #define _kshiftri_mask16(X, Y) \
9651 ((__mmask16) __builtin_ia32_kshiftrihi ((__mmask16)(X), (__mmask8)(Y)))
9653 #define _mm512_cmp_epi64_mask(X, Y, P) \
9654 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
9655 (__v8di)(__m512i)(Y), (int)(P),\
9656 (__mmask8)-1))
9658 #define _mm512_cmp_epi32_mask(X, Y, P) \
9659 ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
9660 (__v16si)(__m512i)(Y), (int)(P), \
9661 (__mmask16)-1))
9663 #define _mm512_cmp_epu64_mask(X, Y, P) \
9664 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
9665 (__v8di)(__m512i)(Y), (int)(P),\
9666 (__mmask8)-1))
9668 #define _mm512_cmp_epu32_mask(X, Y, P) \
9669 ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
9670 (__v16si)(__m512i)(Y), (int)(P), \
9671 (__mmask16)-1))
9673 #define _mm512_cmp_round_pd_mask(X, Y, P, R) \
9674 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
9675 (__v8df)(__m512d)(Y), (int)(P),\
9676 (__mmask8)-1, R))
9678 #define _mm512_cmp_round_ps_mask(X, Y, P, R) \
9679 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
9680 (__v16sf)(__m512)(Y), (int)(P),\
9681 (__mmask16)-1, R))
9683 #define _mm512_mask_cmp_epi64_mask(M, X, Y, P) \
9684 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
9685 (__v8di)(__m512i)(Y), (int)(P),\
9686 (__mmask8)M))
9688 #define _mm512_mask_cmp_epi32_mask(M, X, Y, P) \
9689 ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
9690 (__v16si)(__m512i)(Y), (int)(P), \
9691 (__mmask16)M))
9693 #define _mm512_mask_cmp_epu64_mask(M, X, Y, P) \
9694 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
9695 (__v8di)(__m512i)(Y), (int)(P),\
9696 (__mmask8)M))
9698 #define _mm512_mask_cmp_epu32_mask(M, X, Y, P) \
9699 ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
9700 (__v16si)(__m512i)(Y), (int)(P), \
9701 (__mmask16)M))
9703 #define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R) \
9704 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
9705 (__v8df)(__m512d)(Y), (int)(P),\
9706 (__mmask8)M, R))
9708 #define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R) \
9709 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
9710 (__v16sf)(__m512)(Y), (int)(P),\
9711 (__mmask16)M, R))
9713 #define _mm_cmp_round_sd_mask(X, Y, P, R) \
9714 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
9715 (__v2df)(__m128d)(Y), (int)(P),\
9716 (__mmask8)-1, R))
9718 #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
9719 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
9720 (__v2df)(__m128d)(Y), (int)(P),\
9721 (M), R))
9723 #define _mm_cmp_round_ss_mask(X, Y, P, R) \
9724 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
9725 (__v4sf)(__m128)(Y), (int)(P), \
9726 (__mmask8)-1, R))
9728 #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
9729 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
9730 (__v4sf)(__m128)(Y), (int)(P), \
9731 (M), R))
9732 #endif
9734 #ifdef __OPTIMIZE__
9735 extern __inline __m512
9736 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9737 _mm512_i32gather_ps (__m512i __index, void const *__addr, int __scale)
9739 __m512 __v1_old = _mm512_undefined_ps ();
9740 __mmask16 __mask = 0xFFFF;
9742 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) __v1_old,
9743 __addr,
9744 (__v16si) __index,
9745 __mask, __scale);
9748 extern __inline __m512
9749 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9750 _mm512_mask_i32gather_ps (__m512 __v1_old, __mmask16 __mask,
9751 __m512i __index, void const *__addr, int __scale)
9753 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) __v1_old,
9754 __addr,
9755 (__v16si) __index,
9756 __mask, __scale);
9759 extern __inline __m512d
9760 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9761 _mm512_i32gather_pd (__m256i __index, void const *__addr, int __scale)
9763 __m512d __v1_old = _mm512_undefined_pd ();
9764 __mmask8 __mask = 0xFF;
9766 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
9767 __addr,
9768 (__v8si) __index, __mask,
9769 __scale);
9772 extern __inline __m512d
9773 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9774 _mm512_mask_i32gather_pd (__m512d __v1_old, __mmask8 __mask,
9775 __m256i __index, void const *__addr, int __scale)
9777 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
9778 __addr,
9779 (__v8si) __index,
9780 __mask, __scale);
9783 extern __inline __m256
9784 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9785 _mm512_i64gather_ps (__m512i __index, void const *__addr, int __scale)
9787 __m256 __v1_old = _mm256_undefined_ps ();
9788 __mmask8 __mask = 0xFF;
9790 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
9791 __addr,
9792 (__v8di) __index, __mask,
9793 __scale);
9796 extern __inline __m256
9797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9798 _mm512_mask_i64gather_ps (__m256 __v1_old, __mmask8 __mask,
9799 __m512i __index, void const *__addr, int __scale)
9801 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
9802 __addr,
9803 (__v8di) __index,
9804 __mask, __scale);
9807 extern __inline __m512d
9808 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9809 _mm512_i64gather_pd (__m512i __index, void const *__addr, int __scale)
9811 __m512d __v1_old = _mm512_undefined_pd ();
9812 __mmask8 __mask = 0xFF;
9814 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
9815 __addr,
9816 (__v8di) __index, __mask,
9817 __scale);
9820 extern __inline __m512d
9821 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9822 _mm512_mask_i64gather_pd (__m512d __v1_old, __mmask8 __mask,
9823 __m512i __index, void const *__addr, int __scale)
9825 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
9826 __addr,
9827 (__v8di) __index,
9828 __mask, __scale);
9831 extern __inline __m512i
9832 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9833 _mm512_i32gather_epi32 (__m512i __index, void const *__addr, int __scale)
9835 __m512i __v1_old = _mm512_undefined_epi32 ();
9836 __mmask16 __mask = 0xFFFF;
9838 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
9839 __addr,
9840 (__v16si) __index,
9841 __mask, __scale);
9844 extern __inline __m512i
9845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9846 _mm512_mask_i32gather_epi32 (__m512i __v1_old, __mmask16 __mask,
9847 __m512i __index, void const *__addr, int __scale)
9849 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
9850 __addr,
9851 (__v16si) __index,
9852 __mask, __scale);
9855 extern __inline __m512i
9856 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9857 _mm512_i32gather_epi64 (__m256i __index, void const *__addr, int __scale)
9859 __m512i __v1_old = _mm512_undefined_epi32 ();
9860 __mmask8 __mask = 0xFF;
9862 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
9863 __addr,
9864 (__v8si) __index, __mask,
9865 __scale);
9868 extern __inline __m512i
9869 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9870 _mm512_mask_i32gather_epi64 (__m512i __v1_old, __mmask8 __mask,
9871 __m256i __index, void const *__addr,
9872 int __scale)
9874 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
9875 __addr,
9876 (__v8si) __index,
9877 __mask, __scale);
9880 extern __inline __m256i
9881 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9882 _mm512_i64gather_epi32 (__m512i __index, void const *__addr, int __scale)
9884 __m256i __v1_old = _mm256_undefined_si256 ();
9885 __mmask8 __mask = 0xFF;
9887 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
9888 __addr,
9889 (__v8di) __index,
9890 __mask, __scale);
9893 extern __inline __m256i
9894 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9895 _mm512_mask_i64gather_epi32 (__m256i __v1_old, __mmask8 __mask,
9896 __m512i __index, void const *__addr, int __scale)
9898 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
9899 __addr,
9900 (__v8di) __index,
9901 __mask, __scale);
9904 extern __inline __m512i
9905 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9906 _mm512_i64gather_epi64 (__m512i __index, void const *__addr, int __scale)
9908 __m512i __v1_old = _mm512_undefined_epi32 ();
9909 __mmask8 __mask = 0xFF;
9911 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
9912 __addr,
9913 (__v8di) __index, __mask,
9914 __scale);
9917 extern __inline __m512i
9918 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9919 _mm512_mask_i64gather_epi64 (__m512i __v1_old, __mmask8 __mask,
9920 __m512i __index, void const *__addr,
9921 int __scale)
9923 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
9924 __addr,
9925 (__v8di) __index,
9926 __mask, __scale);
9929 extern __inline void
9930 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9931 _mm512_i32scatter_ps (void *__addr, __m512i __index, __m512 __v1, int __scale)
9933 __builtin_ia32_scattersiv16sf (__addr, (__mmask16) 0xFFFF,
9934 (__v16si) __index, (__v16sf) __v1, __scale);
9937 extern __inline void
9938 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9939 _mm512_mask_i32scatter_ps (void *__addr, __mmask16 __mask,
9940 __m512i __index, __m512 __v1, int __scale)
9942 __builtin_ia32_scattersiv16sf (__addr, __mask, (__v16si) __index,
9943 (__v16sf) __v1, __scale);
9946 extern __inline void
9947 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9948 _mm512_i32scatter_pd (void *__addr, __m256i __index, __m512d __v1,
9949 int __scale)
9951 __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF,
9952 (__v8si) __index, (__v8df) __v1, __scale);
9955 extern __inline void
9956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9957 _mm512_mask_i32scatter_pd (void *__addr, __mmask8 __mask,
9958 __m256i __index, __m512d __v1, int __scale)
9960 __builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index,
9961 (__v8df) __v1, __scale);
9964 extern __inline void
9965 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9966 _mm512_i64scatter_ps (void *__addr, __m512i __index, __m256 __v1, int __scale)
9968 __builtin_ia32_scatterdiv16sf (__addr, (__mmask8) 0xFF,
9969 (__v8di) __index, (__v8sf) __v1, __scale);
9972 extern __inline void
9973 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9974 _mm512_mask_i64scatter_ps (void *__addr, __mmask8 __mask,
9975 __m512i __index, __m256 __v1, int __scale)
9977 __builtin_ia32_scatterdiv16sf (__addr, __mask, (__v8di) __index,
9978 (__v8sf) __v1, __scale);
9981 extern __inline void
9982 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9983 _mm512_i64scatter_pd (void *__addr, __m512i __index, __m512d __v1,
9984 int __scale)
9986 __builtin_ia32_scatterdiv8df (__addr, (__mmask8) 0xFF,
9987 (__v8di) __index, (__v8df) __v1, __scale);
9990 extern __inline void
9991 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9992 _mm512_mask_i64scatter_pd (void *__addr, __mmask8 __mask,
9993 __m512i __index, __m512d __v1, int __scale)
9995 __builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index,
9996 (__v8df) __v1, __scale);
9999 extern __inline void
10000 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10001 _mm512_i32scatter_epi32 (void *__addr, __m512i __index,
10002 __m512i __v1, int __scale)
10004 __builtin_ia32_scattersiv16si (__addr, (__mmask16) 0xFFFF,
10005 (__v16si) __index, (__v16si) __v1, __scale);
10008 extern __inline void
10009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10010 _mm512_mask_i32scatter_epi32 (void *__addr, __mmask16 __mask,
10011 __m512i __index, __m512i __v1, int __scale)
10013 __builtin_ia32_scattersiv16si (__addr, __mask, (__v16si) __index,
10014 (__v16si) __v1, __scale);
10017 extern __inline void
10018 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10019 _mm512_i32scatter_epi64 (void *__addr, __m256i __index,
10020 __m512i __v1, int __scale)
10022 __builtin_ia32_scattersiv8di (__addr, (__mmask8) 0xFF,
10023 (__v8si) __index, (__v8di) __v1, __scale);
10026 extern __inline void
10027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10028 _mm512_mask_i32scatter_epi64 (void *__addr, __mmask8 __mask,
10029 __m256i __index, __m512i __v1, int __scale)
10031 __builtin_ia32_scattersiv8di (__addr, __mask, (__v8si) __index,
10032 (__v8di) __v1, __scale);
10035 extern __inline void
10036 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10037 _mm512_i64scatter_epi32 (void *__addr, __m512i __index,
10038 __m256i __v1, int __scale)
10040 __builtin_ia32_scatterdiv16si (__addr, (__mmask8) 0xFF,
10041 (__v8di) __index, (__v8si) __v1, __scale);
10044 extern __inline void
10045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10046 _mm512_mask_i64scatter_epi32 (void *__addr, __mmask8 __mask,
10047 __m512i __index, __m256i __v1, int __scale)
10049 __builtin_ia32_scatterdiv16si (__addr, __mask, (__v8di) __index,
10050 (__v8si) __v1, __scale);
10053 extern __inline void
10054 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10055 _mm512_i64scatter_epi64 (void *__addr, __m512i __index,
10056 __m512i __v1, int __scale)
10058 __builtin_ia32_scatterdiv8di (__addr, (__mmask8) 0xFF,
10059 (__v8di) __index, (__v8di) __v1, __scale);
10062 extern __inline void
10063 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10064 _mm512_mask_i64scatter_epi64 (void *__addr, __mmask8 __mask,
10065 __m512i __index, __m512i __v1, int __scale)
10067 __builtin_ia32_scatterdiv8di (__addr, __mask, (__v8di) __index,
10068 (__v8di) __v1, __scale);
10070 #else
10071 #define _mm512_i32gather_ps(INDEX, ADDR, SCALE) \
10072 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)_mm512_undefined_ps(),\
10073 (void const *)ADDR, \
10074 (__v16si)(__m512i)INDEX, \
10075 (__mmask16)0xFFFF, (int)SCALE)
10077 #define _mm512_mask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
10078 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)(__m512)V1OLD, \
10079 (void const *)ADDR, \
10080 (__v16si)(__m512i)INDEX, \
10081 (__mmask16)MASK, (int)SCALE)
10083 #define _mm512_i32gather_pd(INDEX, ADDR, SCALE) \
10084 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)_mm512_undefined_pd(), \
10085 (void const *)ADDR, \
10086 (__v8si)(__m256i)INDEX, \
10087 (__mmask8)0xFF, (int)SCALE)
10089 #define _mm512_mask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
10090 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)(__m512d)V1OLD, \
10091 (void const *)ADDR, \
10092 (__v8si)(__m256i)INDEX, \
10093 (__mmask8)MASK, (int)SCALE)
10095 #define _mm512_i64gather_ps(INDEX, ADDR, SCALE) \
10096 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)_mm256_undefined_ps(), \
10097 (void const *)ADDR, \
10098 (__v8di)(__m512i)INDEX, \
10099 (__mmask8)0xFF, (int)SCALE)
10101 #define _mm512_mask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
10102 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)(__m256)V1OLD, \
10103 (void const *)ADDR, \
10104 (__v8di)(__m512i)INDEX, \
10105 (__mmask8)MASK, (int)SCALE)
10107 #define _mm512_i64gather_pd(INDEX, ADDR, SCALE) \
10108 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)_mm512_undefined_pd(), \
10109 (void const *)ADDR, \
10110 (__v8di)(__m512i)INDEX, \
10111 (__mmask8)0xFF, (int)SCALE)
10113 #define _mm512_mask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
10114 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)(__m512d)V1OLD, \
10115 (void const *)ADDR, \
10116 (__v8di)(__m512i)INDEX, \
10117 (__mmask8)MASK, (int)SCALE)
10119 #define _mm512_i32gather_epi32(INDEX, ADDR, SCALE) \
10120 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_undefined_epi32 (), \
10121 (void const *)ADDR, \
10122 (__v16si)(__m512i)INDEX, \
10123 (__mmask16)0xFFFF, (int)SCALE)
10125 #define _mm512_mask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
10126 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)(__m512i)V1OLD, \
10127 (void const *)ADDR, \
10128 (__v16si)(__m512i)INDEX, \
10129 (__mmask16)MASK, (int)SCALE)
10131 #define _mm512_i32gather_epi64(INDEX, ADDR, SCALE) \
10132 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_undefined_epi32 (), \
10133 (void const *)ADDR, \
10134 (__v8si)(__m256i)INDEX, \
10135 (__mmask8)0xFF, (int)SCALE)
10137 #define _mm512_mask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
10138 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)(__m512i)V1OLD, \
10139 (void const *)ADDR, \
10140 (__v8si)(__m256i)INDEX, \
10141 (__mmask8)MASK, (int)SCALE)
10143 #define _mm512_i64gather_epi32(INDEX, ADDR, SCALE) \
10144 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_undefined_si256(), \
10145 (void const *)ADDR, \
10146 (__v8di)(__m512i)INDEX, \
10147 (__mmask8)0xFF, (int)SCALE)
10149 #define _mm512_mask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
10150 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)(__m256i)V1OLD, \
10151 (void const *)ADDR, \
10152 (__v8di)(__m512i)INDEX, \
10153 (__mmask8)MASK, (int)SCALE)
10155 #define _mm512_i64gather_epi64(INDEX, ADDR, SCALE) \
10156 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_undefined_epi32 (), \
10157 (void const *)ADDR, \
10158 (__v8di)(__m512i)INDEX, \
10159 (__mmask8)0xFF, (int)SCALE)
10161 #define _mm512_mask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
10162 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)(__m512i)V1OLD, \
10163 (void const *)ADDR, \
10164 (__v8di)(__m512i)INDEX, \
10165 (__mmask8)MASK, (int)SCALE)
10167 #define _mm512_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
10168 __builtin_ia32_scattersiv16sf ((void *)ADDR, (__mmask16)0xFFFF, \
10169 (__v16si)(__m512i)INDEX, \
10170 (__v16sf)(__m512)V1, (int)SCALE)
10172 #define _mm512_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
10173 __builtin_ia32_scattersiv16sf ((void *)ADDR, (__mmask16)MASK, \
10174 (__v16si)(__m512i)INDEX, \
10175 (__v16sf)(__m512)V1, (int)SCALE)
10177 #define _mm512_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
10178 __builtin_ia32_scattersiv8df ((void *)ADDR, (__mmask8)0xFF, \
10179 (__v8si)(__m256i)INDEX, \
10180 (__v8df)(__m512d)V1, (int)SCALE)
10182 #define _mm512_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
10183 __builtin_ia32_scattersiv8df ((void *)ADDR, (__mmask8)MASK, \
10184 (__v8si)(__m256i)INDEX, \
10185 (__v8df)(__m512d)V1, (int)SCALE)
10187 #define _mm512_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
10188 __builtin_ia32_scatterdiv16sf ((void *)ADDR, (__mmask8)0xFF, \
10189 (__v8di)(__m512i)INDEX, \
10190 (__v8sf)(__m256)V1, (int)SCALE)
10192 #define _mm512_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
10193 __builtin_ia32_scatterdiv16sf ((void *)ADDR, (__mmask16)MASK, \
10194 (__v8di)(__m512i)INDEX, \
10195 (__v8sf)(__m256)V1, (int)SCALE)
10197 #define _mm512_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
10198 __builtin_ia32_scatterdiv8df ((void *)ADDR, (__mmask8)0xFF, \
10199 (__v8di)(__m512i)INDEX, \
10200 (__v8df)(__m512d)V1, (int)SCALE)
10202 #define _mm512_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
10203 __builtin_ia32_scatterdiv8df ((void *)ADDR, (__mmask8)MASK, \
10204 (__v8di)(__m512i)INDEX, \
10205 (__v8df)(__m512d)V1, (int)SCALE)
10207 #define _mm512_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
10208 __builtin_ia32_scattersiv16si ((void *)ADDR, (__mmask16)0xFFFF, \
10209 (__v16si)(__m512i)INDEX, \
10210 (__v16si)(__m512i)V1, (int)SCALE)
10212 #define _mm512_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
10213 __builtin_ia32_scattersiv16si ((void *)ADDR, (__mmask16)MASK, \
10214 (__v16si)(__m512i)INDEX, \
10215 (__v16si)(__m512i)V1, (int)SCALE)
10217 #define _mm512_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
10218 __builtin_ia32_scattersiv8di ((void *)ADDR, (__mmask8)0xFF, \
10219 (__v8si)(__m256i)INDEX, \
10220 (__v8di)(__m512i)V1, (int)SCALE)
10222 #define _mm512_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
10223 __builtin_ia32_scattersiv8di ((void *)ADDR, (__mmask8)MASK, \
10224 (__v8si)(__m256i)INDEX, \
10225 (__v8di)(__m512i)V1, (int)SCALE)
10227 #define _mm512_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
10228 __builtin_ia32_scatterdiv16si ((void *)ADDR, (__mmask8)0xFF, \
10229 (__v8di)(__m512i)INDEX, \
10230 (__v8si)(__m256i)V1, (int)SCALE)
10232 #define _mm512_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
10233 __builtin_ia32_scatterdiv16si ((void *)ADDR, (__mmask8)MASK, \
10234 (__v8di)(__m512i)INDEX, \
10235 (__v8si)(__m256i)V1, (int)SCALE)
10237 #define _mm512_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
10238 __builtin_ia32_scatterdiv8di ((void *)ADDR, (__mmask8)0xFF, \
10239 (__v8di)(__m512i)INDEX, \
10240 (__v8di)(__m512i)V1, (int)SCALE)
10242 #define _mm512_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
10243 __builtin_ia32_scatterdiv8di ((void *)ADDR, (__mmask8)MASK, \
10244 (__v8di)(__m512i)INDEX, \
10245 (__v8di)(__m512i)V1, (int)SCALE)
10246 #endif
10248 extern __inline __m512d
10249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10250 _mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
10252 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
10253 (__v8df) __W,
10254 (__mmask8) __U);
10257 extern __inline __m512d
10258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10259 _mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
10261 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
10262 (__v8df)
10263 _mm512_setzero_pd (),
10264 (__mmask8) __U);
10267 extern __inline void
10268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10269 _mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
10271 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
10272 (__mmask8) __U);
10275 extern __inline __m512
10276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10277 _mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
10279 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
10280 (__v16sf) __W,
10281 (__mmask16) __U);
10284 extern __inline __m512
10285 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10286 _mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
10288 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
10289 (__v16sf)
10290 _mm512_setzero_ps (),
10291 (__mmask16) __U);
10294 extern __inline void
10295 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10296 _mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
10298 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
10299 (__mmask16) __U);
10302 extern __inline __m512i
10303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10304 _mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
10306 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
10307 (__v8di) __W,
10308 (__mmask8) __U);
10311 extern __inline __m512i
10312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10313 _mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
10315 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
10316 (__v8di)
10317 _mm512_setzero_si512 (),
10318 (__mmask8) __U);
10321 extern __inline void
10322 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10323 _mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
10325 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
10326 (__mmask8) __U);
10329 extern __inline __m512i
10330 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10331 _mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
10333 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
10334 (__v16si) __W,
10335 (__mmask16) __U);
10338 extern __inline __m512i
10339 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10340 _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
10342 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
10343 (__v16si)
10344 _mm512_setzero_si512 (),
10345 (__mmask16) __U);
10348 extern __inline void
10349 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10350 _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
10352 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
10353 (__mmask16) __U);
10356 extern __inline __m512d
10357 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10358 _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
10360 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
10361 (__v8df) __W,
10362 (__mmask8) __U);
10365 extern __inline __m512d
10366 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10367 _mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
10369 return (__m512d) __builtin_ia32_expanddf512_maskz ((__v8df) __A,
10370 (__v8df)
10371 _mm512_setzero_pd (),
10372 (__mmask8) __U);
10375 extern __inline __m512d
10376 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10377 _mm512_mask_expandloadu_pd (__m512d __W, __mmask8 __U, void const *__P)
10379 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *) __P,
10380 (__v8df) __W,
10381 (__mmask8) __U);
10384 extern __inline __m512d
10385 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10386 _mm512_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
10388 return (__m512d) __builtin_ia32_expandloaddf512_maskz ((const __v8df *) __P,
10389 (__v8df)
10390 _mm512_setzero_pd (),
10391 (__mmask8) __U);
10394 extern __inline __m512
10395 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10396 _mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
10398 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
10399 (__v16sf) __W,
10400 (__mmask16) __U);
10403 extern __inline __m512
10404 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10405 _mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
10407 return (__m512) __builtin_ia32_expandsf512_maskz ((__v16sf) __A,
10408 (__v16sf)
10409 _mm512_setzero_ps (),
10410 (__mmask16) __U);
10413 extern __inline __m512
10414 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10415 _mm512_mask_expandloadu_ps (__m512 __W, __mmask16 __U, void const *__P)
10417 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *) __P,
10418 (__v16sf) __W,
10419 (__mmask16) __U);
10422 extern __inline __m512
10423 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10424 _mm512_maskz_expandloadu_ps (__mmask16 __U, void const *__P)
10426 return (__m512) __builtin_ia32_expandloadsf512_maskz ((const __v16sf *) __P,
10427 (__v16sf)
10428 _mm512_setzero_ps (),
10429 (__mmask16) __U);
10432 extern __inline __m512i
10433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10434 _mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
10436 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
10437 (__v8di) __W,
10438 (__mmask8) __U);
10441 extern __inline __m512i
10442 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10443 _mm512_maskz_expand_epi64 (__mmask8 __U, __m512i __A)
10445 return (__m512i) __builtin_ia32_expanddi512_maskz ((__v8di) __A,
10446 (__v8di)
10447 _mm512_setzero_si512 (),
10448 (__mmask8) __U);
10451 extern __inline __m512i
10452 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10453 _mm512_mask_expandloadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
10455 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *) __P,
10456 (__v8di) __W,
10457 (__mmask8) __U);
10460 extern __inline __m512i
10461 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10462 _mm512_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
10464 return (__m512i)
10465 __builtin_ia32_expandloaddi512_maskz ((const __v8di *) __P,
10466 (__v8di)
10467 _mm512_setzero_si512 (),
10468 (__mmask8) __U);
10471 extern __inline __m512i
10472 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10473 _mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
10475 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
10476 (__v16si) __W,
10477 (__mmask16) __U);
10480 extern __inline __m512i
10481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10482 _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
10484 return (__m512i) __builtin_ia32_expandsi512_maskz ((__v16si) __A,
10485 (__v16si)
10486 _mm512_setzero_si512 (),
10487 (__mmask16) __U);
10490 extern __inline __m512i
10491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10492 _mm512_mask_expandloadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
10494 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *) __P,
10495 (__v16si) __W,
10496 (__mmask16) __U);
10499 extern __inline __m512i
10500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10501 _mm512_maskz_expandloadu_epi32 (__mmask16 __U, void const *__P)
10503 return (__m512i) __builtin_ia32_expandloadsi512_maskz ((const __v16si *) __P,
10504 (__v16si)
10505 _mm512_setzero_si512
10506 (), (__mmask16) __U);
10509 /* Mask arithmetic operations */
10510 #define _kand_mask16 _mm512_kand
10511 #define _kandn_mask16 _mm512_kandn
10512 #define _knot_mask16 _mm512_knot
10513 #define _kor_mask16 _mm512_kor
10514 #define _kxnor_mask16 _mm512_kxnor
10515 #define _kxor_mask16 _mm512_kxor
10517 extern __inline unsigned char
10518 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10519 _kortest_mask16_u8 (__mmask16 __A, __mmask16 __B, unsigned char *__CF)
10521 *__CF = (unsigned char) __builtin_ia32_kortestchi (__A, __B);
10522 return (unsigned char) __builtin_ia32_kortestzhi (__A, __B);
10525 extern __inline unsigned char
10526 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10527 _kortestz_mask16_u8 (__mmask16 __A, __mmask16 __B)
10529 return (unsigned char) __builtin_ia32_kortestzhi ((__mmask16) __A,
10530 (__mmask16) __B);
10533 extern __inline unsigned char
10534 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10535 _kortestc_mask16_u8 (__mmask16 __A, __mmask16 __B)
10537 return (unsigned char) __builtin_ia32_kortestchi ((__mmask16) __A,
10538 (__mmask16) __B);
10541 extern __inline unsigned int
10542 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10543 _cvtmask16_u32 (__mmask16 __A)
10545 return (unsigned int) __builtin_ia32_kmovw ((__mmask16 ) __A);
10548 extern __inline __mmask16
10549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10550 _cvtu32_mask16 (unsigned int __A)
10552 return (__mmask16) __builtin_ia32_kmovw ((__mmask16 ) __A);
10555 extern __inline __mmask16
10556 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10557 _load_mask16 (__mmask16 *__A)
10559 return (__mmask16) __builtin_ia32_kmovw (*(__mmask16 *) __A);
10562 extern __inline void
10563 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10564 _store_mask16 (__mmask16 *__A, __mmask16 __B)
10566 *(__mmask16 *) __A = __builtin_ia32_kmovw (__B);
10569 extern __inline __mmask16
10570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10571 _mm512_kand (__mmask16 __A, __mmask16 __B)
10573 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
10576 extern __inline __mmask16
10577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10578 _mm512_kandn (__mmask16 __A, __mmask16 __B)
10580 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A,
10581 (__mmask16) __B);
10584 extern __inline __mmask16
10585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10586 _mm512_kor (__mmask16 __A, __mmask16 __B)
10588 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
10591 extern __inline int
10592 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10593 _mm512_kortestz (__mmask16 __A, __mmask16 __B)
10595 return (__mmask16) __builtin_ia32_kortestzhi ((__mmask16) __A,
10596 (__mmask16) __B);
10599 extern __inline int
10600 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10601 _mm512_kortestc (__mmask16 __A, __mmask16 __B)
10603 return (__mmask16) __builtin_ia32_kortestchi ((__mmask16) __A,
10604 (__mmask16) __B);
10607 extern __inline __mmask16
10608 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10609 _mm512_kxnor (__mmask16 __A, __mmask16 __B)
10611 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
10614 extern __inline __mmask16
10615 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10616 _mm512_kxor (__mmask16 __A, __mmask16 __B)
10618 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
10621 extern __inline __mmask16
10622 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10623 _mm512_knot (__mmask16 __A)
10625 return (__mmask16) __builtin_ia32_knothi ((__mmask16) __A);
10628 extern __inline __mmask16
10629 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10630 _mm512_kunpackb (__mmask16 __A, __mmask16 __B)
10632 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
10635 extern __inline __mmask16
10636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10637 _kunpackb_mask16 (__mmask8 __A, __mmask8 __B)
10639 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
10642 #ifdef __OPTIMIZE__
10643 extern __inline __m512i
10644 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10645 _mm512_maskz_inserti32x4 (__mmask16 __B, __m512i __C, __m128i __D,
10646 const int __imm)
10648 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
10649 (__v4si) __D,
10650 __imm,
10651 (__v16si)
10652 _mm512_setzero_si512 (),
10653 __B);
10656 extern __inline __m512
10657 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10658 _mm512_maskz_insertf32x4 (__mmask16 __B, __m512 __C, __m128 __D,
10659 const int __imm)
10661 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
10662 (__v4sf) __D,
10663 __imm,
10664 (__v16sf)
10665 _mm512_setzero_ps (), __B);
10668 extern __inline __m512i
10669 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10670 _mm512_mask_inserti32x4 (__m512i __A, __mmask16 __B, __m512i __C,
10671 __m128i __D, const int __imm)
10673 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
10674 (__v4si) __D,
10675 __imm,
10676 (__v16si) __A,
10677 __B);
10680 extern __inline __m512
10681 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10682 _mm512_mask_insertf32x4 (__m512 __A, __mmask16 __B, __m512 __C,
10683 __m128 __D, const int __imm)
10685 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
10686 (__v4sf) __D,
10687 __imm,
10688 (__v16sf) __A, __B);
10690 #else
10691 #define _mm512_maskz_insertf32x4(A, X, Y, C) \
10692 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
10693 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)_mm512_setzero_ps(), \
10694 (__mmask8)(A)))
10696 #define _mm512_maskz_inserti32x4(A, X, Y, C) \
10697 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
10698 (__v4si)(__m128i) (Y), (int) (C), (__v16si)_mm512_setzero_si512 (), \
10699 (__mmask8)(A)))
10701 #define _mm512_mask_insertf32x4(A, B, X, Y, C) \
10702 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
10703 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (A), \
10704 (__mmask8)(B)))
10706 #define _mm512_mask_inserti32x4(A, B, X, Y, C) \
10707 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
10708 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (A), \
10709 (__mmask8)(B)))
10710 #endif
10712 extern __inline __m512i
10713 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10714 _mm512_max_epi64 (__m512i __A, __m512i __B)
10716 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10717 (__v8di) __B,
10718 (__v8di)
10719 _mm512_undefined_epi32 (),
10720 (__mmask8) -1);
10723 extern __inline __m512i
10724 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10725 _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
10727 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10728 (__v8di) __B,
10729 (__v8di)
10730 _mm512_setzero_si512 (),
10731 __M);
10734 extern __inline __m512i
10735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10736 _mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10738 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10739 (__v8di) __B,
10740 (__v8di) __W, __M);
10743 extern __inline __m512i
10744 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10745 _mm512_min_epi64 (__m512i __A, __m512i __B)
10747 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10748 (__v8di) __B,
10749 (__v8di)
10750 _mm512_undefined_epi32 (),
10751 (__mmask8) -1);
10754 extern __inline __m512i
10755 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10756 _mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10758 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10759 (__v8di) __B,
10760 (__v8di) __W, __M);
10763 extern __inline __m512i
10764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10765 _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
10767 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10768 (__v8di) __B,
10769 (__v8di)
10770 _mm512_setzero_si512 (),
10771 __M);
10774 extern __inline __m512i
10775 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10776 _mm512_max_epu64 (__m512i __A, __m512i __B)
10778 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10779 (__v8di) __B,
10780 (__v8di)
10781 _mm512_undefined_epi32 (),
10782 (__mmask8) -1);
10785 extern __inline __m512i
10786 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10787 _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
10789 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10790 (__v8di) __B,
10791 (__v8di)
10792 _mm512_setzero_si512 (),
10793 __M);
10796 extern __inline __m512i
10797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10798 _mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10800 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10801 (__v8di) __B,
10802 (__v8di) __W, __M);
10805 extern __inline __m512i
10806 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10807 _mm512_min_epu64 (__m512i __A, __m512i __B)
10809 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10810 (__v8di) __B,
10811 (__v8di)
10812 _mm512_undefined_epi32 (),
10813 (__mmask8) -1);
10816 extern __inline __m512i
10817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10818 _mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10820 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10821 (__v8di) __B,
10822 (__v8di) __W, __M);
10825 extern __inline __m512i
10826 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10827 _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
10829 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10830 (__v8di) __B,
10831 (__v8di)
10832 _mm512_setzero_si512 (),
10833 __M);
10836 extern __inline __m512i
10837 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10838 _mm512_max_epi32 (__m512i __A, __m512i __B)
10840 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10841 (__v16si) __B,
10842 (__v16si)
10843 _mm512_undefined_epi32 (),
10844 (__mmask16) -1);
10847 extern __inline __m512i
10848 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10849 _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
10851 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10852 (__v16si) __B,
10853 (__v16si)
10854 _mm512_setzero_si512 (),
10855 __M);
10858 extern __inline __m512i
10859 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10860 _mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10862 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10863 (__v16si) __B,
10864 (__v16si) __W, __M);
10867 extern __inline __m512i
10868 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10869 _mm512_min_epi32 (__m512i __A, __m512i __B)
10871 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10872 (__v16si) __B,
10873 (__v16si)
10874 _mm512_undefined_epi32 (),
10875 (__mmask16) -1);
10878 extern __inline __m512i
10879 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10880 _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
10882 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10883 (__v16si) __B,
10884 (__v16si)
10885 _mm512_setzero_si512 (),
10886 __M);
10889 extern __inline __m512i
10890 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10891 _mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10893 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10894 (__v16si) __B,
10895 (__v16si) __W, __M);
10898 extern __inline __m512i
10899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10900 _mm512_max_epu32 (__m512i __A, __m512i __B)
10902 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10903 (__v16si) __B,
10904 (__v16si)
10905 _mm512_undefined_epi32 (),
10906 (__mmask16) -1);
10909 extern __inline __m512i
10910 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10911 _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
10913 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10914 (__v16si) __B,
10915 (__v16si)
10916 _mm512_setzero_si512 (),
10917 __M);
10920 extern __inline __m512i
10921 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10922 _mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10924 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10925 (__v16si) __B,
10926 (__v16si) __W, __M);
10929 extern __inline __m512i
10930 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10931 _mm512_min_epu32 (__m512i __A, __m512i __B)
10933 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10934 (__v16si) __B,
10935 (__v16si)
10936 _mm512_undefined_epi32 (),
10937 (__mmask16) -1);
10940 extern __inline __m512i
10941 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10942 _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
10944 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10945 (__v16si) __B,
10946 (__v16si)
10947 _mm512_setzero_si512 (),
10948 __M);
10951 extern __inline __m512i
10952 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10953 _mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10955 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10956 (__v16si) __B,
10957 (__v16si) __W, __M);
10960 extern __inline __m512
10961 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10962 _mm512_unpacklo_ps (__m512 __A, __m512 __B)
10964 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10965 (__v16sf) __B,
10966 (__v16sf)
10967 _mm512_undefined_ps (),
10968 (__mmask16) -1);
10971 extern __inline __m512
10972 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10973 _mm512_mask_unpacklo_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10975 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10976 (__v16sf) __B,
10977 (__v16sf) __W,
10978 (__mmask16) __U);
10981 extern __inline __m512
10982 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10983 _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
10985 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10986 (__v16sf) __B,
10987 (__v16sf)
10988 _mm512_setzero_ps (),
10989 (__mmask16) __U);
10992 #ifdef __OPTIMIZE__
10993 extern __inline __m128d
10994 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10995 _mm_max_round_sd (__m128d __A, __m128d __B, const int __R)
10997 return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
10998 (__v2df) __B,
10999 __R);
11002 extern __inline __m128d
11003 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11004 _mm_mask_max_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
11005 __m128d __B, const int __R)
11007 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
11008 (__v2df) __B,
11009 (__v2df) __W,
11010 (__mmask8) __U, __R);
11013 extern __inline __m128d
11014 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11015 _mm_maskz_max_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
11016 const int __R)
11018 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
11019 (__v2df) __B,
11020 (__v2df)
11021 _mm_setzero_pd (),
11022 (__mmask8) __U, __R);
11025 extern __inline __m128
11026 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11027 _mm_max_round_ss (__m128 __A, __m128 __B, const int __R)
11029 return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
11030 (__v4sf) __B,
11031 __R);
11034 extern __inline __m128
11035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11036 _mm_mask_max_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
11037 __m128 __B, const int __R)
11039 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
11040 (__v4sf) __B,
11041 (__v4sf) __W,
11042 (__mmask8) __U, __R);
11045 extern __inline __m128
11046 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11047 _mm_maskz_max_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
11048 const int __R)
11050 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
11051 (__v4sf) __B,
11052 (__v4sf)
11053 _mm_setzero_ps (),
11054 (__mmask8) __U, __R);
11057 extern __inline __m128d
11058 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11059 _mm_min_round_sd (__m128d __A, __m128d __B, const int __R)
11061 return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
11062 (__v2df) __B,
11063 __R);
11066 extern __inline __m128d
11067 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11068 _mm_mask_min_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
11069 __m128d __B, const int __R)
11071 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
11072 (__v2df) __B,
11073 (__v2df) __W,
11074 (__mmask8) __U, __R);
11077 extern __inline __m128d
11078 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11079 _mm_maskz_min_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
11080 const int __R)
11082 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
11083 (__v2df) __B,
11084 (__v2df)
11085 _mm_setzero_pd (),
11086 (__mmask8) __U, __R);
11089 extern __inline __m128
11090 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11091 _mm_min_round_ss (__m128 __A, __m128 __B, const int __R)
11093 return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
11094 (__v4sf) __B,
11095 __R);
11098 extern __inline __m128
11099 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11100 _mm_mask_min_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
11101 __m128 __B, const int __R)
11103 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
11104 (__v4sf) __B,
11105 (__v4sf) __W,
11106 (__mmask8) __U, __R);
11109 extern __inline __m128
11110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11111 _mm_maskz_min_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
11112 const int __R)
11114 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
11115 (__v4sf) __B,
11116 (__v4sf)
11117 _mm_setzero_ps (),
11118 (__mmask8) __U, __R);
11121 #else
11122 #define _mm_max_round_sd(A, B, C) \
11123 (__m128d)__builtin_ia32_maxsd_round(A, B, C)
11125 #define _mm_mask_max_round_sd(W, U, A, B, C) \
11126 (__m128d)__builtin_ia32_maxsd_mask_round(A, B, W, U, C)
11128 #define _mm_maskz_max_round_sd(U, A, B, C) \
11129 (__m128d)__builtin_ia32_maxsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
11131 #define _mm_max_round_ss(A, B, C) \
11132 (__m128)__builtin_ia32_maxss_round(A, B, C)
11134 #define _mm_mask_max_round_ss(W, U, A, B, C) \
11135 (__m128)__builtin_ia32_maxss_mask_round(A, B, W, U, C)
11137 #define _mm_maskz_max_round_ss(U, A, B, C) \
11138 (__m128)__builtin_ia32_maxss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
11140 #define _mm_min_round_sd(A, B, C) \
11141 (__m128d)__builtin_ia32_minsd_round(A, B, C)
11143 #define _mm_mask_min_round_sd(W, U, A, B, C) \
11144 (__m128d)__builtin_ia32_minsd_mask_round(A, B, W, U, C)
11146 #define _mm_maskz_min_round_sd(U, A, B, C) \
11147 (__m128d)__builtin_ia32_minsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
11149 #define _mm_min_round_ss(A, B, C) \
11150 (__m128)__builtin_ia32_minss_round(A, B, C)
11152 #define _mm_mask_min_round_ss(W, U, A, B, C) \
11153 (__m128)__builtin_ia32_minss_mask_round(A, B, W, U, C)
11155 #define _mm_maskz_min_round_ss(U, A, B, C) \
11156 (__m128)__builtin_ia32_minss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
11158 #endif
11160 extern __inline __m512d
11161 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11162 _mm512_mask_blend_pd (__mmask8 __U, __m512d __A, __m512d __W)
11164 return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
11165 (__v8df) __W,
11166 (__mmask8) __U);
11169 extern __inline __m512
11170 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11171 _mm512_mask_blend_ps (__mmask16 __U, __m512 __A, __m512 __W)
11173 return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
11174 (__v16sf) __W,
11175 (__mmask16) __U);
11178 extern __inline __m512i
11179 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11180 _mm512_mask_blend_epi64 (__mmask8 __U, __m512i __A, __m512i __W)
11182 return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
11183 (__v8di) __W,
11184 (__mmask8) __U);
11187 extern __inline __m512i
11188 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11189 _mm512_mask_blend_epi32 (__mmask16 __U, __m512i __A, __m512i __W)
11191 return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
11192 (__v16si) __W,
11193 (__mmask16) __U);
11196 #ifdef __OPTIMIZE__
11197 extern __inline __m128d
11198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11199 _mm_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
11201 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
11202 (__v2df) __A,
11203 (__v2df) __B,
11204 __R);
11207 extern __inline __m128
11208 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11209 _mm_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
11211 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
11212 (__v4sf) __A,
11213 (__v4sf) __B,
11214 __R);
11217 extern __inline __m128d
11218 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11219 _mm_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
11221 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
11222 (__v2df) __A,
11223 -(__v2df) __B,
11224 __R);
11227 extern __inline __m128
11228 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11229 _mm_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
11231 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
11232 (__v4sf) __A,
11233 -(__v4sf) __B,
11234 __R);
11237 extern __inline __m128d
11238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11239 _mm_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
11241 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
11242 -(__v2df) __A,
11243 (__v2df) __B,
11244 __R);
11247 extern __inline __m128
11248 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11249 _mm_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
11251 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
11252 -(__v4sf) __A,
11253 (__v4sf) __B,
11254 __R);
11257 extern __inline __m128d
11258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11259 _mm_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
11261 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
11262 -(__v2df) __A,
11263 -(__v2df) __B,
11264 __R);
11267 extern __inline __m128
11268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11269 _mm_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
11271 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
11272 -(__v4sf) __A,
11273 -(__v4sf) __B,
11274 __R);
11276 #else
11277 #define _mm_fmadd_round_sd(A, B, C, R) \
11278 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, C, R)
11280 #define _mm_fmadd_round_ss(A, B, C, R) \
11281 (__m128)__builtin_ia32_vfmaddss3_round(A, B, C, R)
11283 #define _mm_fmsub_round_sd(A, B, C, R) \
11284 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, -(C), R)
11286 #define _mm_fmsub_round_ss(A, B, C, R) \
11287 (__m128)__builtin_ia32_vfmaddss3_round(A, B, -(C), R)
11289 #define _mm_fnmadd_round_sd(A, B, C, R) \
11290 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), C, R)
11292 #define _mm_fnmadd_round_ss(A, B, C, R) \
11293 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), C, R)
11295 #define _mm_fnmsub_round_sd(A, B, C, R) \
11296 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), -(C), R)
11298 #define _mm_fnmsub_round_ss(A, B, C, R) \
11299 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), -(C), R)
11300 #endif
11302 #ifdef __OPTIMIZE__
11303 extern __inline int
11304 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11305 _mm_comi_round_ss (__m128 __A, __m128 __B, const int __P, const int __R)
11307 return __builtin_ia32_vcomiss ((__v4sf) __A, (__v4sf) __B, __P, __R);
11310 extern __inline int
11311 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11312 _mm_comi_round_sd (__m128d __A, __m128d __B, const int __P, const int __R)
11314 return __builtin_ia32_vcomisd ((__v2df) __A, (__v2df) __B, __P, __R);
11316 #else
11317 #define _mm_comi_round_ss(A, B, C, D)\
11318 __builtin_ia32_vcomiss(A, B, C, D)
11319 #define _mm_comi_round_sd(A, B, C, D)\
11320 __builtin_ia32_vcomisd(A, B, C, D)
11321 #endif
11323 extern __inline __m512d
11324 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11325 _mm512_sqrt_pd (__m512d __A)
11327 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
11328 (__v8df)
11329 _mm512_undefined_pd (),
11330 (__mmask8) -1,
11331 _MM_FROUND_CUR_DIRECTION);
11334 extern __inline __m512d
11335 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11336 _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
11338 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
11339 (__v8df) __W,
11340 (__mmask8) __U,
11341 _MM_FROUND_CUR_DIRECTION);
11344 extern __inline __m512d
11345 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11346 _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
11348 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
11349 (__v8df)
11350 _mm512_setzero_pd (),
11351 (__mmask8) __U,
11352 _MM_FROUND_CUR_DIRECTION);
11355 extern __inline __m512
11356 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11357 _mm512_sqrt_ps (__m512 __A)
11359 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
11360 (__v16sf)
11361 _mm512_undefined_ps (),
11362 (__mmask16) -1,
11363 _MM_FROUND_CUR_DIRECTION);
11366 extern __inline __m512
11367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11368 _mm512_mask_sqrt_ps (__m512 __W, __mmask16 __U, __m512 __A)
11370 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
11371 (__v16sf) __W,
11372 (__mmask16) __U,
11373 _MM_FROUND_CUR_DIRECTION);
11376 extern __inline __m512
11377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11378 _mm512_maskz_sqrt_ps (__mmask16 __U, __m512 __A)
11380 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
11381 (__v16sf)
11382 _mm512_setzero_ps (),
11383 (__mmask16) __U,
11384 _MM_FROUND_CUR_DIRECTION);
11387 extern __inline __m512d
11388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11389 _mm512_add_pd (__m512d __A, __m512d __B)
11391 return (__m512d) ((__v8df)__A + (__v8df)__B);
11394 extern __inline __m512d
11395 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11396 _mm512_mask_add_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11398 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
11399 (__v8df) __B,
11400 (__v8df) __W,
11401 (__mmask8) __U,
11402 _MM_FROUND_CUR_DIRECTION);
11405 extern __inline __m512d
11406 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11407 _mm512_maskz_add_pd (__mmask8 __U, __m512d __A, __m512d __B)
11409 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
11410 (__v8df) __B,
11411 (__v8df)
11412 _mm512_setzero_pd (),
11413 (__mmask8) __U,
11414 _MM_FROUND_CUR_DIRECTION);
11417 extern __inline __m512
11418 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11419 _mm512_add_ps (__m512 __A, __m512 __B)
11421 return (__m512) ((__v16sf)__A + (__v16sf)__B);
11424 extern __inline __m512
11425 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11426 _mm512_mask_add_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11428 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
11429 (__v16sf) __B,
11430 (__v16sf) __W,
11431 (__mmask16) __U,
11432 _MM_FROUND_CUR_DIRECTION);
11435 extern __inline __m512
11436 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11437 _mm512_maskz_add_ps (__mmask16 __U, __m512 __A, __m512 __B)
11439 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
11440 (__v16sf) __B,
11441 (__v16sf)
11442 _mm512_setzero_ps (),
11443 (__mmask16) __U,
11444 _MM_FROUND_CUR_DIRECTION);
11447 extern __inline __m128d
11448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11449 _mm_mask_add_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
11451 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
11452 (__v2df) __B,
11453 (__v2df) __W,
11454 (__mmask8) __U,
11455 _MM_FROUND_CUR_DIRECTION);
11458 extern __inline __m128d
11459 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11460 _mm_maskz_add_sd (__mmask8 __U, __m128d __A, __m128d __B)
11462 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
11463 (__v2df) __B,
11464 (__v2df)
11465 _mm_setzero_pd (),
11466 (__mmask8) __U,
11467 _MM_FROUND_CUR_DIRECTION);
11470 extern __inline __m128
11471 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11472 _mm_mask_add_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
11474 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
11475 (__v4sf) __B,
11476 (__v4sf) __W,
11477 (__mmask8) __U,
11478 _MM_FROUND_CUR_DIRECTION);
11481 extern __inline __m128
11482 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11483 _mm_maskz_add_ss (__mmask8 __U, __m128 __A, __m128 __B)
11485 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
11486 (__v4sf) __B,
11487 (__v4sf)
11488 _mm_setzero_ps (),
11489 (__mmask8) __U,
11490 _MM_FROUND_CUR_DIRECTION);
11493 extern __inline __m512d
11494 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11495 _mm512_sub_pd (__m512d __A, __m512d __B)
11497 return (__m512d) ((__v8df)__A - (__v8df)__B);
11500 extern __inline __m512d
11501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11502 _mm512_mask_sub_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11504 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
11505 (__v8df) __B,
11506 (__v8df) __W,
11507 (__mmask8) __U,
11508 _MM_FROUND_CUR_DIRECTION);
11511 extern __inline __m512d
11512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11513 _mm512_maskz_sub_pd (__mmask8 __U, __m512d __A, __m512d __B)
11515 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
11516 (__v8df) __B,
11517 (__v8df)
11518 _mm512_setzero_pd (),
11519 (__mmask8) __U,
11520 _MM_FROUND_CUR_DIRECTION);
11523 extern __inline __m512
11524 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11525 _mm512_sub_ps (__m512 __A, __m512 __B)
11527 return (__m512) ((__v16sf)__A - (__v16sf)__B);
11530 extern __inline __m512
11531 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11532 _mm512_mask_sub_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11534 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
11535 (__v16sf) __B,
11536 (__v16sf) __W,
11537 (__mmask16) __U,
11538 _MM_FROUND_CUR_DIRECTION);
11541 extern __inline __m512
11542 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11543 _mm512_maskz_sub_ps (__mmask16 __U, __m512 __A, __m512 __B)
11545 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
11546 (__v16sf) __B,
11547 (__v16sf)
11548 _mm512_setzero_ps (),
11549 (__mmask16) __U,
11550 _MM_FROUND_CUR_DIRECTION);
11553 extern __inline __m128d
11554 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11555 _mm_mask_sub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
11557 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
11558 (__v2df) __B,
11559 (__v2df) __W,
11560 (__mmask8) __U,
11561 _MM_FROUND_CUR_DIRECTION);
11564 extern __inline __m128d
11565 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11566 _mm_maskz_sub_sd (__mmask8 __U, __m128d __A, __m128d __B)
11568 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
11569 (__v2df) __B,
11570 (__v2df)
11571 _mm_setzero_pd (),
11572 (__mmask8) __U,
11573 _MM_FROUND_CUR_DIRECTION);
11576 extern __inline __m128
11577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11578 _mm_mask_sub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
11580 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
11581 (__v4sf) __B,
11582 (__v4sf) __W,
11583 (__mmask8) __U,
11584 _MM_FROUND_CUR_DIRECTION);
11587 extern __inline __m128
11588 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11589 _mm_maskz_sub_ss (__mmask8 __U, __m128 __A, __m128 __B)
11591 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
11592 (__v4sf) __B,
11593 (__v4sf)
11594 _mm_setzero_ps (),
11595 (__mmask8) __U,
11596 _MM_FROUND_CUR_DIRECTION);
11599 extern __inline __m512d
11600 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11601 _mm512_mul_pd (__m512d __A, __m512d __B)
11603 return (__m512d) ((__v8df)__A * (__v8df)__B);
11606 extern __inline __m512d
11607 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11608 _mm512_mask_mul_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11610 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
11611 (__v8df) __B,
11612 (__v8df) __W,
11613 (__mmask8) __U,
11614 _MM_FROUND_CUR_DIRECTION);
11617 extern __inline __m512d
11618 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11619 _mm512_maskz_mul_pd (__mmask8 __U, __m512d __A, __m512d __B)
11621 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
11622 (__v8df) __B,
11623 (__v8df)
11624 _mm512_setzero_pd (),
11625 (__mmask8) __U,
11626 _MM_FROUND_CUR_DIRECTION);
11629 extern __inline __m512
11630 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11631 _mm512_mul_ps (__m512 __A, __m512 __B)
11633 return (__m512) ((__v16sf)__A * (__v16sf)__B);
11636 extern __inline __m512
11637 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11638 _mm512_mask_mul_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11640 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
11641 (__v16sf) __B,
11642 (__v16sf) __W,
11643 (__mmask16) __U,
11644 _MM_FROUND_CUR_DIRECTION);
11647 extern __inline __m512
11648 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11649 _mm512_maskz_mul_ps (__mmask16 __U, __m512 __A, __m512 __B)
11651 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
11652 (__v16sf) __B,
11653 (__v16sf)
11654 _mm512_setzero_ps (),
11655 (__mmask16) __U,
11656 _MM_FROUND_CUR_DIRECTION);
11659 extern __inline __m128d
11660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11661 _mm_mask_mul_sd (__m128d __W, __mmask8 __U, __m128d __A,
11662 __m128d __B)
11664 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
11665 (__v2df) __B,
11666 (__v2df) __W,
11667 (__mmask8) __U,
11668 _MM_FROUND_CUR_DIRECTION);
11671 extern __inline __m128d
11672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11673 _mm_maskz_mul_sd (__mmask8 __U, __m128d __A, __m128d __B)
11675 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
11676 (__v2df) __B,
11677 (__v2df)
11678 _mm_setzero_pd (),
11679 (__mmask8) __U,
11680 _MM_FROUND_CUR_DIRECTION);
11683 extern __inline __m128
11684 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11685 _mm_mask_mul_ss (__m128 __W, __mmask8 __U, __m128 __A,
11686 __m128 __B)
11688 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
11689 (__v4sf) __B,
11690 (__v4sf) __W,
11691 (__mmask8) __U,
11692 _MM_FROUND_CUR_DIRECTION);
11695 extern __inline __m128
11696 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11697 _mm_maskz_mul_ss (__mmask8 __U, __m128 __A, __m128 __B)
11699 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
11700 (__v4sf) __B,
11701 (__v4sf)
11702 _mm_setzero_ps (),
11703 (__mmask8) __U,
11704 _MM_FROUND_CUR_DIRECTION);
11707 extern __inline __m512d
11708 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11709 _mm512_div_pd (__m512d __M, __m512d __V)
11711 return (__m512d) ((__v8df)__M / (__v8df)__V);
11714 extern __inline __m512d
11715 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11716 _mm512_mask_div_pd (__m512d __W, __mmask8 __U, __m512d __M, __m512d __V)
11718 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
11719 (__v8df) __V,
11720 (__v8df) __W,
11721 (__mmask8) __U,
11722 _MM_FROUND_CUR_DIRECTION);
11725 extern __inline __m512d
11726 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11727 _mm512_maskz_div_pd (__mmask8 __U, __m512d __M, __m512d __V)
11729 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
11730 (__v8df) __V,
11731 (__v8df)
11732 _mm512_setzero_pd (),
11733 (__mmask8) __U,
11734 _MM_FROUND_CUR_DIRECTION);
11737 extern __inline __m512
11738 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11739 _mm512_div_ps (__m512 __A, __m512 __B)
11741 return (__m512) ((__v16sf)__A / (__v16sf)__B);
11744 extern __inline __m512
11745 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11746 _mm512_mask_div_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11748 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
11749 (__v16sf) __B,
11750 (__v16sf) __W,
11751 (__mmask16) __U,
11752 _MM_FROUND_CUR_DIRECTION);
11755 extern __inline __m512
11756 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11757 _mm512_maskz_div_ps (__mmask16 __U, __m512 __A, __m512 __B)
11759 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
11760 (__v16sf) __B,
11761 (__v16sf)
11762 _mm512_setzero_ps (),
11763 (__mmask16) __U,
11764 _MM_FROUND_CUR_DIRECTION);
11767 extern __inline __m128d
11768 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11769 _mm_mask_div_sd (__m128d __W, __mmask8 __U, __m128d __A,
11770 __m128d __B)
11772 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
11773 (__v2df) __B,
11774 (__v2df) __W,
11775 (__mmask8) __U,
11776 _MM_FROUND_CUR_DIRECTION);
11779 extern __inline __m128d
11780 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11781 _mm_maskz_div_sd (__mmask8 __U, __m128d __A, __m128d __B)
11783 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
11784 (__v2df) __B,
11785 (__v2df)
11786 _mm_setzero_pd (),
11787 (__mmask8) __U,
11788 _MM_FROUND_CUR_DIRECTION);
11791 extern __inline __m128
11792 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11793 _mm_mask_div_ss (__m128 __W, __mmask8 __U, __m128 __A,
11794 __m128 __B)
11796 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
11797 (__v4sf) __B,
11798 (__v4sf) __W,
11799 (__mmask8) __U,
11800 _MM_FROUND_CUR_DIRECTION);
11803 extern __inline __m128
11804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11805 _mm_maskz_div_ss (__mmask8 __U, __m128 __A, __m128 __B)
11807 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
11808 (__v4sf) __B,
11809 (__v4sf)
11810 _mm_setzero_ps (),
11811 (__mmask8) __U,
11812 _MM_FROUND_CUR_DIRECTION);
11815 extern __inline __m512d
11816 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11817 _mm512_max_pd (__m512d __A, __m512d __B)
11819 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
11820 (__v8df) __B,
11821 (__v8df)
11822 _mm512_undefined_pd (),
11823 (__mmask8) -1,
11824 _MM_FROUND_CUR_DIRECTION);
11827 extern __inline __m512d
11828 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11829 _mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11831 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
11832 (__v8df) __B,
11833 (__v8df) __W,
11834 (__mmask8) __U,
11835 _MM_FROUND_CUR_DIRECTION);
11838 extern __inline __m512d
11839 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11840 _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
11842 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
11843 (__v8df) __B,
11844 (__v8df)
11845 _mm512_setzero_pd (),
11846 (__mmask8) __U,
11847 _MM_FROUND_CUR_DIRECTION);
11850 extern __inline __m512
11851 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11852 _mm512_max_ps (__m512 __A, __m512 __B)
11854 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
11855 (__v16sf) __B,
11856 (__v16sf)
11857 _mm512_undefined_ps (),
11858 (__mmask16) -1,
11859 _MM_FROUND_CUR_DIRECTION);
11862 extern __inline __m512
11863 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11864 _mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11866 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
11867 (__v16sf) __B,
11868 (__v16sf) __W,
11869 (__mmask16) __U,
11870 _MM_FROUND_CUR_DIRECTION);
11873 extern __inline __m512
11874 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11875 _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
11877 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
11878 (__v16sf) __B,
11879 (__v16sf)
11880 _mm512_setzero_ps (),
11881 (__mmask16) __U,
11882 _MM_FROUND_CUR_DIRECTION);
11885 extern __inline __m128d
11886 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11887 _mm_mask_max_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
11889 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
11890 (__v2df) __B,
11891 (__v2df) __W,
11892 (__mmask8) __U,
11893 _MM_FROUND_CUR_DIRECTION);
11896 extern __inline __m128d
11897 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11898 _mm_maskz_max_sd (__mmask8 __U, __m128d __A, __m128d __B)
11900 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
11901 (__v2df) __B,
11902 (__v2df)
11903 _mm_setzero_pd (),
11904 (__mmask8) __U,
11905 _MM_FROUND_CUR_DIRECTION);
11908 extern __inline __m128
11909 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11910 _mm_mask_max_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
11912 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
11913 (__v4sf) __B,
11914 (__v4sf) __W,
11915 (__mmask8) __U,
11916 _MM_FROUND_CUR_DIRECTION);
11919 extern __inline __m128
11920 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11921 _mm_maskz_max_ss (__mmask8 __U, __m128 __A, __m128 __B)
11923 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
11924 (__v4sf) __B,
11925 (__v4sf)
11926 _mm_setzero_ps (),
11927 (__mmask8) __U,
11928 _MM_FROUND_CUR_DIRECTION);
11931 extern __inline __m512d
11932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11933 _mm512_min_pd (__m512d __A, __m512d __B)
11935 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
11936 (__v8df) __B,
11937 (__v8df)
11938 _mm512_undefined_pd (),
11939 (__mmask8) -1,
11940 _MM_FROUND_CUR_DIRECTION);
11943 extern __inline __m512d
11944 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11945 _mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11947 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
11948 (__v8df) __B,
11949 (__v8df) __W,
11950 (__mmask8) __U,
11951 _MM_FROUND_CUR_DIRECTION);
11954 extern __inline __m512d
11955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11956 _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
11958 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
11959 (__v8df) __B,
11960 (__v8df)
11961 _mm512_setzero_pd (),
11962 (__mmask8) __U,
11963 _MM_FROUND_CUR_DIRECTION);
11966 extern __inline __m512
11967 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11968 _mm512_min_ps (__m512 __A, __m512 __B)
11970 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11971 (__v16sf) __B,
11972 (__v16sf)
11973 _mm512_undefined_ps (),
11974 (__mmask16) -1,
11975 _MM_FROUND_CUR_DIRECTION);
11978 extern __inline __m512
11979 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11980 _mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11982 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11983 (__v16sf) __B,
11984 (__v16sf) __W,
11985 (__mmask16) __U,
11986 _MM_FROUND_CUR_DIRECTION);
11989 extern __inline __m512
11990 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11991 _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
11993 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11994 (__v16sf) __B,
11995 (__v16sf)
11996 _mm512_setzero_ps (),
11997 (__mmask16) __U,
11998 _MM_FROUND_CUR_DIRECTION);
12001 extern __inline __m128d
12002 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12003 _mm_mask_min_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
12005 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
12006 (__v2df) __B,
12007 (__v2df) __W,
12008 (__mmask8) __U,
12009 _MM_FROUND_CUR_DIRECTION);
12012 extern __inline __m128d
12013 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12014 _mm_maskz_min_sd (__mmask8 __U, __m128d __A, __m128d __B)
12016 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
12017 (__v2df) __B,
12018 (__v2df)
12019 _mm_setzero_pd (),
12020 (__mmask8) __U,
12021 _MM_FROUND_CUR_DIRECTION);
12024 extern __inline __m128
12025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12026 _mm_mask_min_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
12028 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
12029 (__v4sf) __B,
12030 (__v4sf) __W,
12031 (__mmask8) __U,
12032 _MM_FROUND_CUR_DIRECTION);
12035 extern __inline __m128
12036 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12037 _mm_maskz_min_ss (__mmask8 __U, __m128 __A, __m128 __B)
12039 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
12040 (__v4sf) __B,
12041 (__v4sf)
12042 _mm_setzero_ps (),
12043 (__mmask8) __U,
12044 _MM_FROUND_CUR_DIRECTION);
12047 extern __inline __m512d
12048 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12049 _mm512_scalef_pd (__m512d __A, __m512d __B)
12051 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
12052 (__v8df) __B,
12053 (__v8df)
12054 _mm512_undefined_pd (),
12055 (__mmask8) -1,
12056 _MM_FROUND_CUR_DIRECTION);
12059 extern __inline __m512d
12060 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12061 _mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
12063 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
12064 (__v8df) __B,
12065 (__v8df) __W,
12066 (__mmask8) __U,
12067 _MM_FROUND_CUR_DIRECTION);
12070 extern __inline __m512d
12071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12072 _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
12074 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
12075 (__v8df) __B,
12076 (__v8df)
12077 _mm512_setzero_pd (),
12078 (__mmask8) __U,
12079 _MM_FROUND_CUR_DIRECTION);
12082 extern __inline __m512
12083 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12084 _mm512_scalef_ps (__m512 __A, __m512 __B)
12086 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
12087 (__v16sf) __B,
12088 (__v16sf)
12089 _mm512_undefined_ps (),
12090 (__mmask16) -1,
12091 _MM_FROUND_CUR_DIRECTION);
12094 extern __inline __m512
12095 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12096 _mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
12098 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
12099 (__v16sf) __B,
12100 (__v16sf) __W,
12101 (__mmask16) __U,
12102 _MM_FROUND_CUR_DIRECTION);
12105 extern __inline __m512
12106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12107 _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
12109 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
12110 (__v16sf) __B,
12111 (__v16sf)
12112 _mm512_setzero_ps (),
12113 (__mmask16) __U,
12114 _MM_FROUND_CUR_DIRECTION);
12117 extern __inline __m128d
12118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12119 _mm_scalef_sd (__m128d __A, __m128d __B)
12121 return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
12122 (__v2df) __B,
12123 _MM_FROUND_CUR_DIRECTION);
12126 extern __inline __m128
12127 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12128 _mm_scalef_ss (__m128 __A, __m128 __B)
12130 return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
12131 (__v4sf) __B,
12132 _MM_FROUND_CUR_DIRECTION);
12135 extern __inline __m512d
12136 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12137 _mm512_fmadd_pd (__m512d __A, __m512d __B, __m512d __C)
12139 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
12140 (__v8df) __B,
12141 (__v8df) __C,
12142 (__mmask8) -1,
12143 _MM_FROUND_CUR_DIRECTION);
12146 extern __inline __m512d
12147 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12148 _mm512_mask_fmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
12150 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
12151 (__v8df) __B,
12152 (__v8df) __C,
12153 (__mmask8) __U,
12154 _MM_FROUND_CUR_DIRECTION);
12157 extern __inline __m512d
12158 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12159 _mm512_mask3_fmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
12161 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
12162 (__v8df) __B,
12163 (__v8df) __C,
12164 (__mmask8) __U,
12165 _MM_FROUND_CUR_DIRECTION);
12168 extern __inline __m512d
12169 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12170 _mm512_maskz_fmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
12172 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
12173 (__v8df) __B,
12174 (__v8df) __C,
12175 (__mmask8) __U,
12176 _MM_FROUND_CUR_DIRECTION);
12179 extern __inline __m512
12180 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12181 _mm512_fmadd_ps (__m512 __A, __m512 __B, __m512 __C)
12183 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
12184 (__v16sf) __B,
12185 (__v16sf) __C,
12186 (__mmask16) -1,
12187 _MM_FROUND_CUR_DIRECTION);
12190 extern __inline __m512
12191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12192 _mm512_mask_fmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
12194 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
12195 (__v16sf) __B,
12196 (__v16sf) __C,
12197 (__mmask16) __U,
12198 _MM_FROUND_CUR_DIRECTION);
12201 extern __inline __m512
12202 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12203 _mm512_mask3_fmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
12205 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
12206 (__v16sf) __B,
12207 (__v16sf) __C,
12208 (__mmask16) __U,
12209 _MM_FROUND_CUR_DIRECTION);
12212 extern __inline __m512
12213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12214 _mm512_maskz_fmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
12216 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
12217 (__v16sf) __B,
12218 (__v16sf) __C,
12219 (__mmask16) __U,
12220 _MM_FROUND_CUR_DIRECTION);
12223 extern __inline __m512d
12224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12225 _mm512_fmsub_pd (__m512d __A, __m512d __B, __m512d __C)
12227 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
12228 (__v8df) __B,
12229 -(__v8df) __C,
12230 (__mmask8) -1,
12231 _MM_FROUND_CUR_DIRECTION);
12234 extern __inline __m512d
12235 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12236 _mm512_mask_fmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
12238 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
12239 (__v8df) __B,
12240 -(__v8df) __C,
12241 (__mmask8) __U,
12242 _MM_FROUND_CUR_DIRECTION);
12245 extern __inline __m512d
12246 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12247 _mm512_mask3_fmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
12249 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
12250 (__v8df) __B,
12251 (__v8df) __C,
12252 (__mmask8) __U,
12253 _MM_FROUND_CUR_DIRECTION);
12256 extern __inline __m512d
12257 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12258 _mm512_maskz_fmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
12260 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
12261 (__v8df) __B,
12262 -(__v8df) __C,
12263 (__mmask8) __U,
12264 _MM_FROUND_CUR_DIRECTION);
12267 extern __inline __m512
12268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12269 _mm512_fmsub_ps (__m512 __A, __m512 __B, __m512 __C)
12271 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
12272 (__v16sf) __B,
12273 -(__v16sf) __C,
12274 (__mmask16) -1,
12275 _MM_FROUND_CUR_DIRECTION);
12278 extern __inline __m512
12279 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12280 _mm512_mask_fmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
12282 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
12283 (__v16sf) __B,
12284 -(__v16sf) __C,
12285 (__mmask16) __U,
12286 _MM_FROUND_CUR_DIRECTION);
12289 extern __inline __m512
12290 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12291 _mm512_mask3_fmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
12293 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
12294 (__v16sf) __B,
12295 (__v16sf) __C,
12296 (__mmask16) __U,
12297 _MM_FROUND_CUR_DIRECTION);
12300 extern __inline __m512
12301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12302 _mm512_maskz_fmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
12304 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
12305 (__v16sf) __B,
12306 -(__v16sf) __C,
12307 (__mmask16) __U,
12308 _MM_FROUND_CUR_DIRECTION);
12311 extern __inline __m512d
12312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12313 _mm512_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C)
12315 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
12316 (__v8df) __B,
12317 (__v8df) __C,
12318 (__mmask8) -1,
12319 _MM_FROUND_CUR_DIRECTION);
12322 extern __inline __m512d
12323 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12324 _mm512_mask_fmaddsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
12326 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
12327 (__v8df) __B,
12328 (__v8df) __C,
12329 (__mmask8) __U,
12330 _MM_FROUND_CUR_DIRECTION);
12333 extern __inline __m512d
12334 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12335 _mm512_mask3_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
12337 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
12338 (__v8df) __B,
12339 (__v8df) __C,
12340 (__mmask8) __U,
12341 _MM_FROUND_CUR_DIRECTION);
12344 extern __inline __m512d
12345 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12346 _mm512_maskz_fmaddsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
12348 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
12349 (__v8df) __B,
12350 (__v8df) __C,
12351 (__mmask8) __U,
12352 _MM_FROUND_CUR_DIRECTION);
12355 extern __inline __m512
12356 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12357 _mm512_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C)
12359 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
12360 (__v16sf) __B,
12361 (__v16sf) __C,
12362 (__mmask16) -1,
12363 _MM_FROUND_CUR_DIRECTION);
12366 extern __inline __m512
12367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12368 _mm512_mask_fmaddsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
12370 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
12371 (__v16sf) __B,
12372 (__v16sf) __C,
12373 (__mmask16) __U,
12374 _MM_FROUND_CUR_DIRECTION);
12377 extern __inline __m512
12378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12379 _mm512_mask3_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
12381 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
12382 (__v16sf) __B,
12383 (__v16sf) __C,
12384 (__mmask16) __U,
12385 _MM_FROUND_CUR_DIRECTION);
12388 extern __inline __m512
12389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12390 _mm512_maskz_fmaddsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
12392 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
12393 (__v16sf) __B,
12394 (__v16sf) __C,
12395 (__mmask16) __U,
12396 _MM_FROUND_CUR_DIRECTION);
12399 extern __inline __m512d
12400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12401 _mm512_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C)
12403 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
12404 (__v8df) __B,
12405 -(__v8df) __C,
12406 (__mmask8) -1,
12407 _MM_FROUND_CUR_DIRECTION);
12410 extern __inline __m512d
12411 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12412 _mm512_mask_fmsubadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
12414 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
12415 (__v8df) __B,
12416 -(__v8df) __C,
12417 (__mmask8) __U,
12418 _MM_FROUND_CUR_DIRECTION);
12421 extern __inline __m512d
12422 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12423 _mm512_mask3_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
12425 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
12426 (__v8df) __B,
12427 (__v8df) __C,
12428 (__mmask8) __U,
12429 _MM_FROUND_CUR_DIRECTION);
12432 extern __inline __m512d
12433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12434 _mm512_maskz_fmsubadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
12436 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
12437 (__v8df) __B,
12438 -(__v8df) __C,
12439 (__mmask8) __U,
12440 _MM_FROUND_CUR_DIRECTION);
12443 extern __inline __m512
12444 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12445 _mm512_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C)
12447 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
12448 (__v16sf) __B,
12449 -(__v16sf) __C,
12450 (__mmask16) -1,
12451 _MM_FROUND_CUR_DIRECTION);
12454 extern __inline __m512
12455 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12456 _mm512_mask_fmsubadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
12458 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
12459 (__v16sf) __B,
12460 -(__v16sf) __C,
12461 (__mmask16) __U,
12462 _MM_FROUND_CUR_DIRECTION);
12465 extern __inline __m512
12466 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12467 _mm512_mask3_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
12469 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
12470 (__v16sf) __B,
12471 (__v16sf) __C,
12472 (__mmask16) __U,
12473 _MM_FROUND_CUR_DIRECTION);
12476 extern __inline __m512
12477 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12478 _mm512_maskz_fmsubadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
12480 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
12481 (__v16sf) __B,
12482 -(__v16sf) __C,
12483 (__mmask16) __U,
12484 _MM_FROUND_CUR_DIRECTION);
12487 extern __inline __m512d
12488 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12489 _mm512_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C)
12491 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
12492 (__v8df) __B,
12493 (__v8df) __C,
12494 (__mmask8) -1,
12495 _MM_FROUND_CUR_DIRECTION);
12498 extern __inline __m512d
12499 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12500 _mm512_mask_fnmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
12502 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
12503 (__v8df) __B,
12504 (__v8df) __C,
12505 (__mmask8) __U,
12506 _MM_FROUND_CUR_DIRECTION);
12509 extern __inline __m512d
12510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12511 _mm512_mask3_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
12513 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
12514 (__v8df) __B,
12515 (__v8df) __C,
12516 (__mmask8) __U,
12517 _MM_FROUND_CUR_DIRECTION);
12520 extern __inline __m512d
12521 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12522 _mm512_maskz_fnmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
12524 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
12525 (__v8df) __B,
12526 (__v8df) __C,
12527 (__mmask8) __U,
12528 _MM_FROUND_CUR_DIRECTION);
12531 extern __inline __m512
12532 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12533 _mm512_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C)
12535 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
12536 (__v16sf) __B,
12537 (__v16sf) __C,
12538 (__mmask16) -1,
12539 _MM_FROUND_CUR_DIRECTION);
12542 extern __inline __m512
12543 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12544 _mm512_mask_fnmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
12546 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
12547 (__v16sf) __B,
12548 (__v16sf) __C,
12549 (__mmask16) __U,
12550 _MM_FROUND_CUR_DIRECTION);
12553 extern __inline __m512
12554 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12555 _mm512_mask3_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
12557 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
12558 (__v16sf) __B,
12559 (__v16sf) __C,
12560 (__mmask16) __U,
12561 _MM_FROUND_CUR_DIRECTION);
12564 extern __inline __m512
12565 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12566 _mm512_maskz_fnmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
12568 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
12569 (__v16sf) __B,
12570 (__v16sf) __C,
12571 (__mmask16) __U,
12572 _MM_FROUND_CUR_DIRECTION);
12575 extern __inline __m512d
12576 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12577 _mm512_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C)
12579 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
12580 (__v8df) __B,
12581 -(__v8df) __C,
12582 (__mmask8) -1,
12583 _MM_FROUND_CUR_DIRECTION);
12586 extern __inline __m512d
12587 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12588 _mm512_mask_fnmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
12590 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
12591 (__v8df) __B,
12592 (__v8df) __C,
12593 (__mmask8) __U,
12594 _MM_FROUND_CUR_DIRECTION);
12597 extern __inline __m512d
12598 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12599 _mm512_mask3_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
12601 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
12602 (__v8df) __B,
12603 (__v8df) __C,
12604 (__mmask8) __U,
12605 _MM_FROUND_CUR_DIRECTION);
12608 extern __inline __m512d
12609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12610 _mm512_maskz_fnmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
12612 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
12613 (__v8df) __B,
12614 -(__v8df) __C,
12615 (__mmask8) __U,
12616 _MM_FROUND_CUR_DIRECTION);
12619 extern __inline __m512
12620 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12621 _mm512_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C)
12623 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
12624 (__v16sf) __B,
12625 -(__v16sf) __C,
12626 (__mmask16) -1,
12627 _MM_FROUND_CUR_DIRECTION);
12630 extern __inline __m512
12631 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12632 _mm512_mask_fnmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
12634 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
12635 (__v16sf) __B,
12636 (__v16sf) __C,
12637 (__mmask16) __U,
12638 _MM_FROUND_CUR_DIRECTION);
12641 extern __inline __m512
12642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12643 _mm512_mask3_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
12645 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
12646 (__v16sf) __B,
12647 (__v16sf) __C,
12648 (__mmask16) __U,
12649 _MM_FROUND_CUR_DIRECTION);
12652 extern __inline __m512
12653 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12654 _mm512_maskz_fnmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
12656 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
12657 (__v16sf) __B,
12658 -(__v16sf) __C,
12659 (__mmask16) __U,
12660 _MM_FROUND_CUR_DIRECTION);
12663 extern __inline __m256i
12664 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12665 _mm512_cvttpd_epi32 (__m512d __A)
12667 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
12668 (__v8si)
12669 _mm256_undefined_si256 (),
12670 (__mmask8) -1,
12671 _MM_FROUND_CUR_DIRECTION);
12674 extern __inline __m256i
12675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12676 _mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
12678 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
12679 (__v8si) __W,
12680 (__mmask8) __U,
12681 _MM_FROUND_CUR_DIRECTION);
12684 extern __inline __m256i
12685 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12686 _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
12688 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
12689 (__v8si)
12690 _mm256_setzero_si256 (),
12691 (__mmask8) __U,
12692 _MM_FROUND_CUR_DIRECTION);
12695 extern __inline __m256i
12696 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12697 _mm512_cvttpd_epu32 (__m512d __A)
12699 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
12700 (__v8si)
12701 _mm256_undefined_si256 (),
12702 (__mmask8) -1,
12703 _MM_FROUND_CUR_DIRECTION);
12706 extern __inline __m256i
12707 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12708 _mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
12710 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
12711 (__v8si) __W,
12712 (__mmask8) __U,
12713 _MM_FROUND_CUR_DIRECTION);
12716 extern __inline __m256i
12717 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12718 _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
12720 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
12721 (__v8si)
12722 _mm256_setzero_si256 (),
12723 (__mmask8) __U,
12724 _MM_FROUND_CUR_DIRECTION);
12727 extern __inline __m256i
12728 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12729 _mm512_cvtpd_epi32 (__m512d __A)
12731 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
12732 (__v8si)
12733 _mm256_undefined_si256 (),
12734 (__mmask8) -1,
12735 _MM_FROUND_CUR_DIRECTION);
12738 extern __inline __m256i
12739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12740 _mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
12742 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
12743 (__v8si) __W,
12744 (__mmask8) __U,
12745 _MM_FROUND_CUR_DIRECTION);
12748 extern __inline __m256i
12749 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12750 _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
12752 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
12753 (__v8si)
12754 _mm256_setzero_si256 (),
12755 (__mmask8) __U,
12756 _MM_FROUND_CUR_DIRECTION);
12759 extern __inline __m256i
12760 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12761 _mm512_cvtpd_epu32 (__m512d __A)
12763 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
12764 (__v8si)
12765 _mm256_undefined_si256 (),
12766 (__mmask8) -1,
12767 _MM_FROUND_CUR_DIRECTION);
12770 extern __inline __m256i
12771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12772 _mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
12774 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
12775 (__v8si) __W,
12776 (__mmask8) __U,
12777 _MM_FROUND_CUR_DIRECTION);
12780 extern __inline __m256i
12781 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12782 _mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
12784 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
12785 (__v8si)
12786 _mm256_setzero_si256 (),
12787 (__mmask8) __U,
12788 _MM_FROUND_CUR_DIRECTION);
12791 extern __inline __m512i
12792 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12793 _mm512_cvttps_epi32 (__m512 __A)
12795 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
12796 (__v16si)
12797 _mm512_undefined_epi32 (),
12798 (__mmask16) -1,
12799 _MM_FROUND_CUR_DIRECTION);
12802 extern __inline __m512i
12803 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12804 _mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
12806 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
12807 (__v16si) __W,
12808 (__mmask16) __U,
12809 _MM_FROUND_CUR_DIRECTION);
12812 extern __inline __m512i
12813 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12814 _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
12816 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
12817 (__v16si)
12818 _mm512_setzero_si512 (),
12819 (__mmask16) __U,
12820 _MM_FROUND_CUR_DIRECTION);
12823 extern __inline __m512i
12824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12825 _mm512_cvttps_epu32 (__m512 __A)
12827 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
12828 (__v16si)
12829 _mm512_undefined_epi32 (),
12830 (__mmask16) -1,
12831 _MM_FROUND_CUR_DIRECTION);
12834 extern __inline __m512i
12835 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12836 _mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
12838 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
12839 (__v16si) __W,
12840 (__mmask16) __U,
12841 _MM_FROUND_CUR_DIRECTION);
12844 extern __inline __m512i
12845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12846 _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
12848 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
12849 (__v16si)
12850 _mm512_setzero_si512 (),
12851 (__mmask16) __U,
12852 _MM_FROUND_CUR_DIRECTION);
12855 extern __inline __m512i
12856 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12857 _mm512_cvtps_epi32 (__m512 __A)
12859 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
12860 (__v16si)
12861 _mm512_undefined_epi32 (),
12862 (__mmask16) -1,
12863 _MM_FROUND_CUR_DIRECTION);
12866 extern __inline __m512i
12867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12868 _mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
12870 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
12871 (__v16si) __W,
12872 (__mmask16) __U,
12873 _MM_FROUND_CUR_DIRECTION);
12876 extern __inline __m512i
12877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12878 _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
12880 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
12881 (__v16si)
12882 _mm512_setzero_si512 (),
12883 (__mmask16) __U,
12884 _MM_FROUND_CUR_DIRECTION);
12887 extern __inline __m512i
12888 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12889 _mm512_cvtps_epu32 (__m512 __A)
12891 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
12892 (__v16si)
12893 _mm512_undefined_epi32 (),
12894 (__mmask16) -1,
12895 _MM_FROUND_CUR_DIRECTION);
12898 extern __inline __m512i
12899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12900 _mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
12902 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
12903 (__v16si) __W,
12904 (__mmask16) __U,
12905 _MM_FROUND_CUR_DIRECTION);
12908 extern __inline __m512i
12909 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12910 _mm512_maskz_cvtps_epu32 (__mmask16 __U, __m512 __A)
12912 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
12913 (__v16si)
12914 _mm512_setzero_si512 (),
12915 (__mmask16) __U,
12916 _MM_FROUND_CUR_DIRECTION);
12919 extern __inline double
12920 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12921 _mm512_cvtsd_f64 (__m512d __A)
12923 return __A[0];
12926 extern __inline float
12927 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12928 _mm512_cvtss_f32 (__m512 __A)
12930 return __A[0];
12933 #ifdef __x86_64__
12934 extern __inline __m128
12935 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12936 _mm_cvtu64_ss (__m128 __A, unsigned long long __B)
12938 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
12939 _MM_FROUND_CUR_DIRECTION);
12942 extern __inline __m128d
12943 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12944 _mm_cvtu64_sd (__m128d __A, unsigned long long __B)
12946 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
12947 _MM_FROUND_CUR_DIRECTION);
12949 #endif
12951 extern __inline __m128
12952 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12953 _mm_cvtu32_ss (__m128 __A, unsigned __B)
12955 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
12956 _MM_FROUND_CUR_DIRECTION);
12959 extern __inline __m512
12960 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12961 _mm512_cvtepi32_ps (__m512i __A)
12963 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
12964 (__v16sf)
12965 _mm512_undefined_ps (),
12966 (__mmask16) -1,
12967 _MM_FROUND_CUR_DIRECTION);
12970 extern __inline __m512
12971 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12972 _mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
12974 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
12975 (__v16sf) __W,
12976 (__mmask16) __U,
12977 _MM_FROUND_CUR_DIRECTION);
12980 extern __inline __m512
12981 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12982 _mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
12984 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
12985 (__v16sf)
12986 _mm512_setzero_ps (),
12987 (__mmask16) __U,
12988 _MM_FROUND_CUR_DIRECTION);
12991 extern __inline __m512
12992 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12993 _mm512_cvtepu32_ps (__m512i __A)
12995 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
12996 (__v16sf)
12997 _mm512_undefined_ps (),
12998 (__mmask16) -1,
12999 _MM_FROUND_CUR_DIRECTION);
13002 extern __inline __m512
13003 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13004 _mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
13006 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
13007 (__v16sf) __W,
13008 (__mmask16) __U,
13009 _MM_FROUND_CUR_DIRECTION);
13012 extern __inline __m512
13013 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13014 _mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
13016 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
13017 (__v16sf)
13018 _mm512_setzero_ps (),
13019 (__mmask16) __U,
13020 _MM_FROUND_CUR_DIRECTION);
13023 #ifdef __OPTIMIZE__
13024 extern __inline __m512d
13025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13026 _mm512_fixupimm_pd (__m512d __A, __m512d __B, __m512i __C, const int __imm)
13028 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
13029 (__v8df) __B,
13030 (__v8di) __C,
13031 __imm,
13032 (__mmask8) -1,
13033 _MM_FROUND_CUR_DIRECTION);
13036 extern __inline __m512d
13037 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13038 _mm512_mask_fixupimm_pd (__m512d __A, __mmask8 __U, __m512d __B,
13039 __m512i __C, const int __imm)
13041 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
13042 (__v8df) __B,
13043 (__v8di) __C,
13044 __imm,
13045 (__mmask8) __U,
13046 _MM_FROUND_CUR_DIRECTION);
13049 extern __inline __m512d
13050 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13051 _mm512_maskz_fixupimm_pd (__mmask8 __U, __m512d __A, __m512d __B,
13052 __m512i __C, const int __imm)
13054 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
13055 (__v8df) __B,
13056 (__v8di) __C,
13057 __imm,
13058 (__mmask8) __U,
13059 _MM_FROUND_CUR_DIRECTION);
13062 extern __inline __m512
13063 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13064 _mm512_fixupimm_ps (__m512 __A, __m512 __B, __m512i __C, const int __imm)
13066 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
13067 (__v16sf) __B,
13068 (__v16si) __C,
13069 __imm,
13070 (__mmask16) -1,
13071 _MM_FROUND_CUR_DIRECTION);
13074 extern __inline __m512
13075 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13076 _mm512_mask_fixupimm_ps (__m512 __A, __mmask16 __U, __m512 __B,
13077 __m512i __C, const int __imm)
13079 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
13080 (__v16sf) __B,
13081 (__v16si) __C,
13082 __imm,
13083 (__mmask16) __U,
13084 _MM_FROUND_CUR_DIRECTION);
13087 extern __inline __m512
13088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13089 _mm512_maskz_fixupimm_ps (__mmask16 __U, __m512 __A, __m512 __B,
13090 __m512i __C, const int __imm)
13092 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
13093 (__v16sf) __B,
13094 (__v16si) __C,
13095 __imm,
13096 (__mmask16) __U,
13097 _MM_FROUND_CUR_DIRECTION);
13100 extern __inline __m128d
13101 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13102 _mm_fixupimm_sd (__m128d __A, __m128d __B, __m128i __C, const int __imm)
13104 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
13105 (__v2df) __B,
13106 (__v2di) __C, __imm,
13107 (__mmask8) -1,
13108 _MM_FROUND_CUR_DIRECTION);
13111 extern __inline __m128d
13112 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13113 _mm_mask_fixupimm_sd (__m128d __A, __mmask8 __U, __m128d __B,
13114 __m128i __C, const int __imm)
13116 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
13117 (__v2df) __B,
13118 (__v2di) __C, __imm,
13119 (__mmask8) __U,
13120 _MM_FROUND_CUR_DIRECTION);
13123 extern __inline __m128d
13124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13125 _mm_maskz_fixupimm_sd (__mmask8 __U, __m128d __A, __m128d __B,
13126 __m128i __C, const int __imm)
13128 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
13129 (__v2df) __B,
13130 (__v2di) __C,
13131 __imm,
13132 (__mmask8) __U,
13133 _MM_FROUND_CUR_DIRECTION);
13136 extern __inline __m128
13137 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13138 _mm_fixupimm_ss (__m128 __A, __m128 __B, __m128i __C, const int __imm)
13140 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
13141 (__v4sf) __B,
13142 (__v4si) __C, __imm,
13143 (__mmask8) -1,
13144 _MM_FROUND_CUR_DIRECTION);
13147 extern __inline __m128
13148 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13149 _mm_mask_fixupimm_ss (__m128 __A, __mmask8 __U, __m128 __B,
13150 __m128i __C, const int __imm)
13152 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
13153 (__v4sf) __B,
13154 (__v4si) __C, __imm,
13155 (__mmask8) __U,
13156 _MM_FROUND_CUR_DIRECTION);
13159 extern __inline __m128
13160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13161 _mm_maskz_fixupimm_ss (__mmask8 __U, __m128 __A, __m128 __B,
13162 __m128i __C, const int __imm)
13164 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
13165 (__v4sf) __B,
13166 (__v4si) __C, __imm,
13167 (__mmask8) __U,
13168 _MM_FROUND_CUR_DIRECTION);
13170 #else
13171 #define _mm512_fixupimm_pd(X, Y, Z, C) \
13172 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
13173 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
13174 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
13176 #define _mm512_mask_fixupimm_pd(X, U, Y, Z, C) \
13177 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
13178 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
13179 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13181 #define _mm512_maskz_fixupimm_pd(U, X, Y, Z, C) \
13182 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
13183 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
13184 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13186 #define _mm512_fixupimm_ps(X, Y, Z, C) \
13187 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
13188 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
13189 (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
13191 #define _mm512_mask_fixupimm_ps(X, U, Y, Z, C) \
13192 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
13193 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
13194 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
13196 #define _mm512_maskz_fixupimm_ps(U, X, Y, Z, C) \
13197 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
13198 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
13199 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
13201 #define _mm_fixupimm_sd(X, Y, Z, C) \
13202 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
13203 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
13204 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
13206 #define _mm_mask_fixupimm_sd(X, U, Y, Z, C) \
13207 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
13208 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
13209 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13211 #define _mm_maskz_fixupimm_sd(U, X, Y, Z, C) \
13212 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
13213 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
13214 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13216 #define _mm_fixupimm_ss(X, Y, Z, C) \
13217 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
13218 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
13219 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
13221 #define _mm_mask_fixupimm_ss(X, U, Y, Z, C) \
13222 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
13223 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
13224 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13226 #define _mm_maskz_fixupimm_ss(U, X, Y, Z, C) \
13227 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
13228 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
13229 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13230 #endif
13232 #ifdef __x86_64__
13233 extern __inline unsigned long long
13234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13235 _mm_cvtss_u64 (__m128 __A)
13237 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
13238 __A,
13239 _MM_FROUND_CUR_DIRECTION);
13242 extern __inline unsigned long long
13243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13244 _mm_cvttss_u64 (__m128 __A)
13246 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
13247 __A,
13248 _MM_FROUND_CUR_DIRECTION);
13251 extern __inline long long
13252 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13253 _mm_cvttss_i64 (__m128 __A)
13255 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
13256 _MM_FROUND_CUR_DIRECTION);
13258 #endif /* __x86_64__ */
13260 extern __inline unsigned
13261 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13262 _mm_cvtss_u32 (__m128 __A)
13264 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
13265 _MM_FROUND_CUR_DIRECTION);
13268 extern __inline unsigned
13269 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13270 _mm_cvttss_u32 (__m128 __A)
13272 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
13273 _MM_FROUND_CUR_DIRECTION);
13276 extern __inline int
13277 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13278 _mm_cvttss_i32 (__m128 __A)
13280 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
13281 _MM_FROUND_CUR_DIRECTION);
13284 #ifdef __x86_64__
13285 extern __inline unsigned long long
13286 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13287 _mm_cvtsd_u64 (__m128d __A)
13289 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
13290 __A,
13291 _MM_FROUND_CUR_DIRECTION);
13294 extern __inline unsigned long long
13295 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13296 _mm_cvttsd_u64 (__m128d __A)
13298 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
13299 __A,
13300 _MM_FROUND_CUR_DIRECTION);
13303 extern __inline long long
13304 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13305 _mm_cvttsd_i64 (__m128d __A)
13307 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
13308 _MM_FROUND_CUR_DIRECTION);
13310 #endif /* __x86_64__ */
13312 extern __inline unsigned
13313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13314 _mm_cvtsd_u32 (__m128d __A)
13316 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
13317 _MM_FROUND_CUR_DIRECTION);
13320 extern __inline unsigned
13321 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13322 _mm_cvttsd_u32 (__m128d __A)
13324 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
13325 _MM_FROUND_CUR_DIRECTION);
13328 extern __inline int
13329 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13330 _mm_cvttsd_i32 (__m128d __A)
13332 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
13333 _MM_FROUND_CUR_DIRECTION);
13336 extern __inline __m512d
13337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13338 _mm512_cvtps_pd (__m256 __A)
13340 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
13341 (__v8df)
13342 _mm512_undefined_pd (),
13343 (__mmask8) -1,
13344 _MM_FROUND_CUR_DIRECTION);
13347 extern __inline __m512d
13348 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13349 _mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
13351 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
13352 (__v8df) __W,
13353 (__mmask8) __U,
13354 _MM_FROUND_CUR_DIRECTION);
13357 extern __inline __m512d
13358 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13359 _mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
13361 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
13362 (__v8df)
13363 _mm512_setzero_pd (),
13364 (__mmask8) __U,
13365 _MM_FROUND_CUR_DIRECTION);
13368 extern __inline __m512
13369 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13370 _mm512_cvtph_ps (__m256i __A)
13372 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
13373 (__v16sf)
13374 _mm512_undefined_ps (),
13375 (__mmask16) -1,
13376 _MM_FROUND_CUR_DIRECTION);
13379 extern __inline __m512
13380 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13381 _mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
13383 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
13384 (__v16sf) __W,
13385 (__mmask16) __U,
13386 _MM_FROUND_CUR_DIRECTION);
13389 extern __inline __m512
13390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13391 _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
13393 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
13394 (__v16sf)
13395 _mm512_setzero_ps (),
13396 (__mmask16) __U,
13397 _MM_FROUND_CUR_DIRECTION);
13400 extern __inline __m256
13401 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13402 _mm512_cvtpd_ps (__m512d __A)
13404 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
13405 (__v8sf)
13406 _mm256_undefined_ps (),
13407 (__mmask8) -1,
13408 _MM_FROUND_CUR_DIRECTION);
13411 extern __inline __m256
13412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13413 _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
13415 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
13416 (__v8sf) __W,
13417 (__mmask8) __U,
13418 _MM_FROUND_CUR_DIRECTION);
13421 extern __inline __m256
13422 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13423 _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
13425 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
13426 (__v8sf)
13427 _mm256_setzero_ps (),
13428 (__mmask8) __U,
13429 _MM_FROUND_CUR_DIRECTION);
13432 #ifdef __OPTIMIZE__
13433 extern __inline __m512
13434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13435 _mm512_getexp_ps (__m512 __A)
13437 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
13438 (__v16sf)
13439 _mm512_undefined_ps (),
13440 (__mmask16) -1,
13441 _MM_FROUND_CUR_DIRECTION);
13444 extern __inline __m512
13445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13446 _mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
13448 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
13449 (__v16sf) __W,
13450 (__mmask16) __U,
13451 _MM_FROUND_CUR_DIRECTION);
13454 extern __inline __m512
13455 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13456 _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
13458 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
13459 (__v16sf)
13460 _mm512_setzero_ps (),
13461 (__mmask16) __U,
13462 _MM_FROUND_CUR_DIRECTION);
13465 extern __inline __m512d
13466 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13467 _mm512_getexp_pd (__m512d __A)
13469 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
13470 (__v8df)
13471 _mm512_undefined_pd (),
13472 (__mmask8) -1,
13473 _MM_FROUND_CUR_DIRECTION);
13476 extern __inline __m512d
13477 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13478 _mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
13480 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
13481 (__v8df) __W,
13482 (__mmask8) __U,
13483 _MM_FROUND_CUR_DIRECTION);
13486 extern __inline __m512d
13487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13488 _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
13490 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
13491 (__v8df)
13492 _mm512_setzero_pd (),
13493 (__mmask8) __U,
13494 _MM_FROUND_CUR_DIRECTION);
13497 extern __inline __m128
13498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13499 _mm_getexp_ss (__m128 __A, __m128 __B)
13501 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
13502 (__v4sf) __B,
13503 _MM_FROUND_CUR_DIRECTION);
13506 extern __inline __m128
13507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13508 _mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
13510 return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
13511 (__v4sf) __B,
13512 (__v4sf) __W,
13513 (__mmask8) __U,
13514 _MM_FROUND_CUR_DIRECTION);
13517 extern __inline __m128
13518 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13519 _mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
13521 return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
13522 (__v4sf) __B,
13523 (__v4sf)
13524 _mm_setzero_ps (),
13525 (__mmask8) __U,
13526 _MM_FROUND_CUR_DIRECTION);
13529 extern __inline __m128d
13530 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13531 _mm_getexp_sd (__m128d __A, __m128d __B)
13533 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
13534 (__v2df) __B,
13535 _MM_FROUND_CUR_DIRECTION);
13538 extern __inline __m128d
13539 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13540 _mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
13542 return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
13543 (__v2df) __B,
13544 (__v2df) __W,
13545 (__mmask8) __U,
13546 _MM_FROUND_CUR_DIRECTION);
13549 extern __inline __m128d
13550 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13551 _mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
13553 return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
13554 (__v2df) __B,
13555 (__v2df)
13556 _mm_setzero_pd (),
13557 (__mmask8) __U,
13558 _MM_FROUND_CUR_DIRECTION);
13561 extern __inline __m512d
13562 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13563 _mm512_getmant_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
13564 _MM_MANTISSA_SIGN_ENUM __C)
13566 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
13567 (__C << 2) | __B,
13568 _mm512_undefined_pd (),
13569 (__mmask8) -1,
13570 _MM_FROUND_CUR_DIRECTION);
13573 extern __inline __m512d
13574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13575 _mm512_mask_getmant_pd (__m512d __W, __mmask8 __U, __m512d __A,
13576 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
13578 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
13579 (__C << 2) | __B,
13580 (__v8df) __W, __U,
13581 _MM_FROUND_CUR_DIRECTION);
13584 extern __inline __m512d
13585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13586 _mm512_maskz_getmant_pd (__mmask8 __U, __m512d __A,
13587 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
13589 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
13590 (__C << 2) | __B,
13591 (__v8df)
13592 _mm512_setzero_pd (),
13593 __U,
13594 _MM_FROUND_CUR_DIRECTION);
13597 extern __inline __m512
13598 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13599 _mm512_getmant_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
13600 _MM_MANTISSA_SIGN_ENUM __C)
13602 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
13603 (__C << 2) | __B,
13604 _mm512_undefined_ps (),
13605 (__mmask16) -1,
13606 _MM_FROUND_CUR_DIRECTION);
13609 extern __inline __m512
13610 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13611 _mm512_mask_getmant_ps (__m512 __W, __mmask16 __U, __m512 __A,
13612 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
13614 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
13615 (__C << 2) | __B,
13616 (__v16sf) __W, __U,
13617 _MM_FROUND_CUR_DIRECTION);
13620 extern __inline __m512
13621 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13622 _mm512_maskz_getmant_ps (__mmask16 __U, __m512 __A,
13623 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
13625 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
13626 (__C << 2) | __B,
13627 (__v16sf)
13628 _mm512_setzero_ps (),
13629 __U,
13630 _MM_FROUND_CUR_DIRECTION);
13633 extern __inline __m128d
13634 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13635 _mm_getmant_sd (__m128d __A, __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
13636 _MM_MANTISSA_SIGN_ENUM __D)
13638 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
13639 (__v2df) __B,
13640 (__D << 2) | __C,
13641 _MM_FROUND_CUR_DIRECTION);
13644 extern __inline __m128d
13645 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13646 _mm_mask_getmant_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
13647 _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
13649 return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
13650 (__v2df) __B,
13651 (__D << 2) | __C,
13652 (__v2df) __W,
13653 __U,
13654 _MM_FROUND_CUR_DIRECTION);
13657 extern __inline __m128d
13658 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13659 _mm_maskz_getmant_sd (__mmask8 __U, __m128d __A, __m128d __B,
13660 _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
13662 return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
13663 (__v2df) __B,
13664 (__D << 2) | __C,
13665 (__v2df)
13666 _mm_setzero_pd(),
13667 __U,
13668 _MM_FROUND_CUR_DIRECTION);
13671 extern __inline __m128
13672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13673 _mm_getmant_ss (__m128 __A, __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
13674 _MM_MANTISSA_SIGN_ENUM __D)
13676 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
13677 (__v4sf) __B,
13678 (__D << 2) | __C,
13679 _MM_FROUND_CUR_DIRECTION);
13682 extern __inline __m128
13683 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13684 _mm_mask_getmant_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
13685 _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
13687 return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
13688 (__v4sf) __B,
13689 (__D << 2) | __C,
13690 (__v4sf) __W,
13691 __U,
13692 _MM_FROUND_CUR_DIRECTION);
13695 extern __inline __m128
13696 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13697 _mm_maskz_getmant_ss (__mmask8 __U, __m128 __A, __m128 __B,
13698 _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
13700 return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
13701 (__v4sf) __B,
13702 (__D << 2) | __C,
13703 (__v4sf)
13704 _mm_setzero_ps(),
13705 __U,
13706 _MM_FROUND_CUR_DIRECTION);
13709 #else
13710 #define _mm512_getmant_pd(X, B, C) \
13711 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
13712 (int)(((C)<<2) | (B)), \
13713 (__v8df)_mm512_undefined_pd(), \
13714 (__mmask8)-1,\
13715 _MM_FROUND_CUR_DIRECTION))
13717 #define _mm512_mask_getmant_pd(W, U, X, B, C) \
13718 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
13719 (int)(((C)<<2) | (B)), \
13720 (__v8df)(__m512d)(W), \
13721 (__mmask8)(U),\
13722 _MM_FROUND_CUR_DIRECTION))
13724 #define _mm512_maskz_getmant_pd(U, X, B, C) \
13725 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
13726 (int)(((C)<<2) | (B)), \
13727 (__v8df)_mm512_setzero_pd(), \
13728 (__mmask8)(U),\
13729 _MM_FROUND_CUR_DIRECTION))
13730 #define _mm512_getmant_ps(X, B, C) \
13731 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
13732 (int)(((C)<<2) | (B)), \
13733 (__v16sf)_mm512_undefined_ps(), \
13734 (__mmask16)-1,\
13735 _MM_FROUND_CUR_DIRECTION))
13737 #define _mm512_mask_getmant_ps(W, U, X, B, C) \
13738 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
13739 (int)(((C)<<2) | (B)), \
13740 (__v16sf)(__m512)(W), \
13741 (__mmask16)(U),\
13742 _MM_FROUND_CUR_DIRECTION))
13744 #define _mm512_maskz_getmant_ps(U, X, B, C) \
13745 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
13746 (int)(((C)<<2) | (B)), \
13747 (__v16sf)_mm512_setzero_ps(), \
13748 (__mmask16)(U),\
13749 _MM_FROUND_CUR_DIRECTION))
13750 #define _mm_getmant_sd(X, Y, C, D) \
13751 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
13752 (__v2df)(__m128d)(Y), \
13753 (int)(((D)<<2) | (C)), \
13754 _MM_FROUND_CUR_DIRECTION))
13756 #define _mm_mask_getmant_sd(W, U, X, Y, C, D) \
13757 ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
13758 (__v2df)(__m128d)(Y), \
13759 (int)(((D)<<2) | (C)), \
13760 (__v2df)(__m128d)(W), \
13761 (__mmask8)(U),\
13762 _MM_FROUND_CUR_DIRECTION))
13764 #define _mm_maskz_getmant_sd(U, X, Y, C, D) \
13765 ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
13766 (__v2df)(__m128d)(Y), \
13767 (int)(((D)<<2) | (C)), \
13768 (__v2df)_mm_setzero_pd(), \
13769 (__mmask8)(U),\
13770 _MM_FROUND_CUR_DIRECTION))
13772 #define _mm_getmant_ss(X, Y, C, D) \
13773 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
13774 (__v4sf)(__m128)(Y), \
13775 (int)(((D)<<2) | (C)), \
13776 _MM_FROUND_CUR_DIRECTION))
13778 #define _mm_mask_getmant_ss(W, U, X, Y, C, D) \
13779 ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
13780 (__v4sf)(__m128)(Y), \
13781 (int)(((D)<<2) | (C)), \
13782 (__v4sf)(__m128)(W), \
13783 (__mmask8)(U),\
13784 _MM_FROUND_CUR_DIRECTION))
13786 #define _mm_maskz_getmant_ss(U, X, Y, C, D) \
13787 ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
13788 (__v4sf)(__m128)(Y), \
13789 (int)(((D)<<2) | (C)), \
13790 (__v4sf)_mm_setzero_ps(), \
13791 (__mmask8)(U),\
13792 _MM_FROUND_CUR_DIRECTION))
13794 #define _mm_getexp_ss(A, B) \
13795 ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
13796 _MM_FROUND_CUR_DIRECTION))
13798 #define _mm_mask_getexp_ss(W, U, A, B) \
13799 (__m128)__builtin_ia32_getexpss_mask_round(A, B, W, U,\
13800 _MM_FROUND_CUR_DIRECTION)
13802 #define _mm_maskz_getexp_ss(U, A, B) \
13803 (__m128)__builtin_ia32_getexpss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U,\
13804 _MM_FROUND_CUR_DIRECTION)
13806 #define _mm_getexp_sd(A, B) \
13807 ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B),\
13808 _MM_FROUND_CUR_DIRECTION))
13810 #define _mm_mask_getexp_sd(W, U, A, B) \
13811 (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, W, U,\
13812 _MM_FROUND_CUR_DIRECTION)
13814 #define _mm_maskz_getexp_sd(U, A, B) \
13815 (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U,\
13816 _MM_FROUND_CUR_DIRECTION)
13818 #define _mm512_getexp_ps(A) \
13819 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
13820 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
13822 #define _mm512_mask_getexp_ps(W, U, A) \
13823 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
13824 (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
13826 #define _mm512_maskz_getexp_ps(U, A) \
13827 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
13828 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
13830 #define _mm512_getexp_pd(A) \
13831 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
13832 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
13834 #define _mm512_mask_getexp_pd(W, U, A) \
13835 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
13836 (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13838 #define _mm512_maskz_getexp_pd(U, A) \
13839 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
13840 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
13841 #endif
13843 #ifdef __OPTIMIZE__
13844 extern __inline __m512
13845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13846 _mm512_roundscale_ps (__m512 __A, const int __imm)
13848 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
13849 (__v16sf)
13850 _mm512_undefined_ps (),
13852 _MM_FROUND_CUR_DIRECTION);
13855 extern __inline __m512
13856 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13857 _mm512_mask_roundscale_ps (__m512 __A, __mmask16 __B, __m512 __C,
13858 const int __imm)
13860 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
13861 (__v16sf) __A,
13862 (__mmask16) __B,
13863 _MM_FROUND_CUR_DIRECTION);
13866 extern __inline __m512
13867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13868 _mm512_maskz_roundscale_ps (__mmask16 __A, __m512 __B, const int __imm)
13870 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
13871 __imm,
13872 (__v16sf)
13873 _mm512_setzero_ps (),
13874 (__mmask16) __A,
13875 _MM_FROUND_CUR_DIRECTION);
13878 extern __inline __m512d
13879 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13880 _mm512_roundscale_pd (__m512d __A, const int __imm)
13882 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
13883 (__v8df)
13884 _mm512_undefined_pd (),
13886 _MM_FROUND_CUR_DIRECTION);
13889 extern __inline __m512d
13890 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13891 _mm512_mask_roundscale_pd (__m512d __A, __mmask8 __B, __m512d __C,
13892 const int __imm)
13894 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
13895 (__v8df) __A,
13896 (__mmask8) __B,
13897 _MM_FROUND_CUR_DIRECTION);
13900 extern __inline __m512d
13901 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13902 _mm512_maskz_roundscale_pd (__mmask8 __A, __m512d __B, const int __imm)
13904 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
13905 __imm,
13906 (__v8df)
13907 _mm512_setzero_pd (),
13908 (__mmask8) __A,
13909 _MM_FROUND_CUR_DIRECTION);
13912 extern __inline __m128
13913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13914 _mm_roundscale_ss (__m128 __A, __m128 __B, const int __imm)
13916 return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
13917 (__v4sf) __B, __imm,
13918 _MM_FROUND_CUR_DIRECTION);
13921 extern __inline __m128d
13922 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13923 _mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm)
13925 return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
13926 (__v2df) __B, __imm,
13927 _MM_FROUND_CUR_DIRECTION);
13930 #else
13931 #define _mm512_roundscale_ps(A, B) \
13932 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
13933 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
13934 #define _mm512_mask_roundscale_ps(A, B, C, D) \
13935 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
13936 (int)(D), \
13937 (__v16sf)(__m512)(A), \
13938 (__mmask16)(B), _MM_FROUND_CUR_DIRECTION))
13939 #define _mm512_maskz_roundscale_ps(A, B, C) \
13940 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
13941 (int)(C), \
13942 (__v16sf)_mm512_setzero_ps(),\
13943 (__mmask16)(A), _MM_FROUND_CUR_DIRECTION))
13944 #define _mm512_roundscale_pd(A, B) \
13945 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
13946 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
13947 #define _mm512_mask_roundscale_pd(A, B, C, D) \
13948 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
13949 (int)(D), \
13950 (__v8df)(__m512d)(A), \
13951 (__mmask8)(B), _MM_FROUND_CUR_DIRECTION))
13952 #define _mm512_maskz_roundscale_pd(A, B, C) \
13953 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
13954 (int)(C), \
13955 (__v8df)_mm512_setzero_pd(),\
13956 (__mmask8)(A), _MM_FROUND_CUR_DIRECTION))
13957 #define _mm_roundscale_ss(A, B, C) \
13958 ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \
13959 (__v4sf)(__m128)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
13960 #define _mm_roundscale_sd(A, B, C) \
13961 ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \
13962 (__v2df)(__m128d)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
13963 #endif
13965 #ifdef __OPTIMIZE__
13966 extern __inline __mmask8
13967 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13968 _mm512_cmp_pd_mask (__m512d __X, __m512d __Y, const int __P)
13970 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
13971 (__v8df) __Y, __P,
13972 (__mmask8) -1,
13973 _MM_FROUND_CUR_DIRECTION);
13976 extern __inline __mmask16
13977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13978 _mm512_cmp_ps_mask (__m512 __X, __m512 __Y, const int __P)
13980 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
13981 (__v16sf) __Y, __P,
13982 (__mmask16) -1,
13983 _MM_FROUND_CUR_DIRECTION);
13986 extern __inline __mmask16
13987 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13988 _mm512_mask_cmp_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y, const int __P)
13990 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
13991 (__v16sf) __Y, __P,
13992 (__mmask16) __U,
13993 _MM_FROUND_CUR_DIRECTION);
13996 extern __inline __mmask8
13997 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13998 _mm512_mask_cmp_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, const int __P)
14000 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
14001 (__v8df) __Y, __P,
14002 (__mmask8) __U,
14003 _MM_FROUND_CUR_DIRECTION);
14006 extern __inline __mmask8
14007 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14008 _mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P)
14010 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
14011 (__v2df) __Y, __P,
14012 (__mmask8) -1,
14013 _MM_FROUND_CUR_DIRECTION);
14016 extern __inline __mmask8
14017 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14018 _mm_mask_cmp_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, const int __P)
14020 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
14021 (__v2df) __Y, __P,
14022 (__mmask8) __M,
14023 _MM_FROUND_CUR_DIRECTION);
14026 extern __inline __mmask8
14027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14028 _mm_cmp_ss_mask (__m128 __X, __m128 __Y, const int __P)
14030 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
14031 (__v4sf) __Y, __P,
14032 (__mmask8) -1,
14033 _MM_FROUND_CUR_DIRECTION);
14036 extern __inline __mmask8
14037 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14038 _mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
14040 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
14041 (__v4sf) __Y, __P,
14042 (__mmask8) __M,
14043 _MM_FROUND_CUR_DIRECTION);
14046 #else
14047 #define _mm512_cmp_pd_mask(X, Y, P) \
14048 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
14049 (__v8df)(__m512d)(Y), (int)(P),\
14050 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
14052 #define _mm512_cmp_ps_mask(X, Y, P) \
14053 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
14054 (__v16sf)(__m512)(Y), (int)(P),\
14055 (__mmask16)-1,_MM_FROUND_CUR_DIRECTION))
14057 #define _mm512_mask_cmp_pd_mask(M, X, Y, P) \
14058 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
14059 (__v8df)(__m512d)(Y), (int)(P),\
14060 (__mmask8)M, _MM_FROUND_CUR_DIRECTION))
14062 #define _mm512_mask_cmp_ps_mask(M, X, Y, P) \
14063 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
14064 (__v16sf)(__m512)(Y), (int)(P),\
14065 (__mmask16)M,_MM_FROUND_CUR_DIRECTION))
14067 #define _mm_cmp_sd_mask(X, Y, P) \
14068 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
14069 (__v2df)(__m128d)(Y), (int)(P),\
14070 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
14072 #define _mm_mask_cmp_sd_mask(M, X, Y, P) \
14073 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
14074 (__v2df)(__m128d)(Y), (int)(P),\
14075 M,_MM_FROUND_CUR_DIRECTION))
14077 #define _mm_cmp_ss_mask(X, Y, P) \
14078 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
14079 (__v4sf)(__m128)(Y), (int)(P), \
14080 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
14082 #define _mm_mask_cmp_ss_mask(M, X, Y, P) \
14083 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
14084 (__v4sf)(__m128)(Y), (int)(P), \
14085 M,_MM_FROUND_CUR_DIRECTION))
14086 #endif
14088 extern __inline __mmask16
14089 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14090 _mm512_kmov (__mmask16 __A)
14092 return __builtin_ia32_kmovw (__A);
14095 extern __inline __m512
14096 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14097 _mm512_castpd_ps (__m512d __A)
14099 return (__m512) (__A);
14102 extern __inline __m512i
14103 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14104 _mm512_castpd_si512 (__m512d __A)
14106 return (__m512i) (__A);
14109 extern __inline __m512d
14110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14111 _mm512_castps_pd (__m512 __A)
14113 return (__m512d) (__A);
14116 extern __inline __m512i
14117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14118 _mm512_castps_si512 (__m512 __A)
14120 return (__m512i) (__A);
14123 extern __inline __m512
14124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14125 _mm512_castsi512_ps (__m512i __A)
14127 return (__m512) (__A);
14130 extern __inline __m512d
14131 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14132 _mm512_castsi512_pd (__m512i __A)
14134 return (__m512d) (__A);
14137 extern __inline __m128d
14138 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14139 _mm512_castpd512_pd128 (__m512d __A)
14141 return (__m128d)_mm512_extractf32x4_ps((__m512)__A, 0);
14144 extern __inline __m128
14145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14146 _mm512_castps512_ps128 (__m512 __A)
14148 return _mm512_extractf32x4_ps(__A, 0);
14151 extern __inline __m128i
14152 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14153 _mm512_castsi512_si128 (__m512i __A)
14155 return (__m128i)_mm512_extracti32x4_epi32((__m512i)__A, 0);
14158 extern __inline __m256d
14159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14160 _mm512_castpd512_pd256 (__m512d __A)
14162 return _mm512_extractf64x4_pd(__A, 0);
14165 extern __inline __m256
14166 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14167 _mm512_castps512_ps256 (__m512 __A)
14169 return (__m256)_mm512_extractf64x4_pd((__m512d)__A, 0);
14172 extern __inline __m256i
14173 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14174 _mm512_castsi512_si256 (__m512i __A)
14176 return (__m256i)_mm512_extractf64x4_pd((__m512d)__A, 0);
14179 extern __inline __m512d
14180 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14181 _mm512_castpd128_pd512 (__m128d __A)
14183 return (__m512d) __builtin_ia32_pd512_pd((__m128d)__A);
14186 extern __inline __m512
14187 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14188 _mm512_castps128_ps512 (__m128 __A)
14190 return (__m512) __builtin_ia32_ps512_ps((__m128)__A);
14193 extern __inline __m512i
14194 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14195 _mm512_castsi128_si512 (__m128i __A)
14197 return (__m512i) __builtin_ia32_si512_si((__v4si)__A);
14200 extern __inline __m512d
14201 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14202 _mm512_castpd256_pd512 (__m256d __A)
14204 return __builtin_ia32_pd512_256pd (__A);
14207 extern __inline __m512
14208 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14209 _mm512_castps256_ps512 (__m256 __A)
14211 return __builtin_ia32_ps512_256ps (__A);
14214 extern __inline __m512i
14215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14216 _mm512_castsi256_si512 (__m256i __A)
14218 return (__m512i)__builtin_ia32_si512_256si ((__v8si)__A);
14221 extern __inline __mmask16
14222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14223 _mm512_cmpeq_epu32_mask (__m512i __A, __m512i __B)
14225 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
14226 (__v16si) __B, 0,
14227 (__mmask16) -1);
14230 extern __inline __mmask16
14231 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14232 _mm512_mask_cmpeq_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
14234 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
14235 (__v16si) __B, 0, __U);
14238 extern __inline __mmask8
14239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14240 _mm512_mask_cmpeq_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
14242 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
14243 (__v8di) __B, 0, __U);
14246 extern __inline __mmask8
14247 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14248 _mm512_cmpeq_epu64_mask (__m512i __A, __m512i __B)
14250 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
14251 (__v8di) __B, 0,
14252 (__mmask8) -1);
14255 extern __inline __mmask16
14256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14257 _mm512_cmpgt_epu32_mask (__m512i __A, __m512i __B)
14259 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
14260 (__v16si) __B, 6,
14261 (__mmask16) -1);
14264 extern __inline __mmask16
14265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14266 _mm512_mask_cmpgt_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
14268 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
14269 (__v16si) __B, 6, __U);
14272 extern __inline __mmask8
14273 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14274 _mm512_mask_cmpgt_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
14276 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
14277 (__v8di) __B, 6, __U);
14280 extern __inline __mmask8
14281 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14282 _mm512_cmpgt_epu64_mask (__m512i __A, __m512i __B)
14284 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
14285 (__v8di) __B, 6,
14286 (__mmask8) -1);
14289 #undef __MM512_REDUCE_OP
14290 #define __MM512_REDUCE_OP(op) \
14291 __v8si __T1 = (__v8si) _mm512_extracti64x4_epi64 (__A, 1); \
14292 __v8si __T2 = (__v8si) _mm512_extracti64x4_epi64 (__A, 0); \
14293 __m256i __T3 = (__m256i) (__T1 op __T2); \
14294 __v4si __T4 = (__v4si) _mm256_extracti128_si256 (__T3, 1); \
14295 __v4si __T5 = (__v4si) _mm256_extracti128_si256 (__T3, 0); \
14296 __v4si __T6 = __T4 op __T5; \
14297 __v4si __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 }); \
14298 __v4si __T8 = __T6 op __T7; \
14299 return __T8[0] op __T8[1]
14301 extern __inline int
14302 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14303 _mm512_reduce_add_epi32 (__m512i __A)
14305 __MM512_REDUCE_OP (+);
14308 extern __inline int
14309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14310 _mm512_reduce_mul_epi32 (__m512i __A)
14312 __MM512_REDUCE_OP (*);
14315 extern __inline int
14316 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14317 _mm512_reduce_and_epi32 (__m512i __A)
14319 __MM512_REDUCE_OP (&);
14322 extern __inline int
14323 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14324 _mm512_reduce_or_epi32 (__m512i __A)
14326 __MM512_REDUCE_OP (|);
14329 extern __inline int
14330 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14331 _mm512_mask_reduce_add_epi32 (__mmask16 __U, __m512i __A)
14333 __A = _mm512_maskz_mov_epi32 (__U, __A);
14334 __MM512_REDUCE_OP (+);
14337 extern __inline int
14338 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14339 _mm512_mask_reduce_mul_epi32 (__mmask16 __U, __m512i __A)
14341 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (1), __U, __A);
14342 __MM512_REDUCE_OP (*);
14345 extern __inline int
14346 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14347 _mm512_mask_reduce_and_epi32 (__mmask16 __U, __m512i __A)
14349 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (~0), __U, __A);
14350 __MM512_REDUCE_OP (&);
14353 extern __inline int
14354 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14355 _mm512_mask_reduce_or_epi32 (__mmask16 __U, __m512i __A)
14357 __A = _mm512_maskz_mov_epi32 (__U, __A);
14358 __MM512_REDUCE_OP (|);
14361 #undef __MM512_REDUCE_OP
14362 #define __MM512_REDUCE_OP(op) \
14363 __m256i __T1 = (__m256i) _mm512_extracti64x4_epi64 (__A, 1); \
14364 __m256i __T2 = (__m256i) _mm512_extracti64x4_epi64 (__A, 0); \
14365 __m256i __T3 = _mm256_##op (__T1, __T2); \
14366 __m128i __T4 = (__m128i) _mm256_extracti128_si256 (__T3, 1); \
14367 __m128i __T5 = (__m128i) _mm256_extracti128_si256 (__T3, 0); \
14368 __m128i __T6 = _mm_##op (__T4, __T5); \
14369 __m128i __T7 = (__m128i) __builtin_shuffle ((__v4si) __T6, \
14370 (__v4si) { 2, 3, 0, 1 }); \
14371 __m128i __T8 = _mm_##op (__T6, __T7); \
14372 __m128i __T9 = (__m128i) __builtin_shuffle ((__v4si) __T8, \
14373 (__v4si) { 1, 0, 1, 0 }); \
14374 __v4si __T10 = (__v4si) _mm_##op (__T8, __T9); \
14375 return __T10[0]
14377 extern __inline int
14378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14379 _mm512_reduce_min_epi32 (__m512i __A)
14381 __MM512_REDUCE_OP (min_epi32);
14384 extern __inline int
14385 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14386 _mm512_reduce_max_epi32 (__m512i __A)
14388 __MM512_REDUCE_OP (max_epi32);
14391 extern __inline unsigned int
14392 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14393 _mm512_reduce_min_epu32 (__m512i __A)
14395 __MM512_REDUCE_OP (min_epu32);
14398 extern __inline unsigned int
14399 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14400 _mm512_reduce_max_epu32 (__m512i __A)
14402 __MM512_REDUCE_OP (max_epu32);
14405 extern __inline int
14406 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14407 _mm512_mask_reduce_min_epi32 (__mmask16 __U, __m512i __A)
14409 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (__INT_MAX__), __U, __A);
14410 __MM512_REDUCE_OP (min_epi32);
14413 extern __inline int
14414 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14415 _mm512_mask_reduce_max_epi32 (__mmask16 __U, __m512i __A)
14417 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (-__INT_MAX__ - 1), __U, __A);
14418 __MM512_REDUCE_OP (max_epi32);
14421 extern __inline unsigned int
14422 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14423 _mm512_mask_reduce_min_epu32 (__mmask16 __U, __m512i __A)
14425 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (~0), __U, __A);
14426 __MM512_REDUCE_OP (min_epu32);
14429 extern __inline unsigned int
14430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14431 _mm512_mask_reduce_max_epu32 (__mmask16 __U, __m512i __A)
14433 __A = _mm512_maskz_mov_epi32 (__U, __A);
14434 __MM512_REDUCE_OP (max_epu32);
14437 #undef __MM512_REDUCE_OP
14438 #define __MM512_REDUCE_OP(op) \
14439 __m256 __T1 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 1); \
14440 __m256 __T2 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 0); \
14441 __m256 __T3 = __T1 op __T2; \
14442 __m128 __T4 = _mm256_extractf128_ps (__T3, 1); \
14443 __m128 __T5 = _mm256_extractf128_ps (__T3, 0); \
14444 __m128 __T6 = __T4 op __T5; \
14445 __m128 __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 }); \
14446 __m128 __T8 = __T6 op __T7; \
14447 return __T8[0] op __T8[1]
14449 extern __inline float
14450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14451 _mm512_reduce_add_ps (__m512 __A)
14453 __MM512_REDUCE_OP (+);
14456 extern __inline float
14457 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14458 _mm512_reduce_mul_ps (__m512 __A)
14460 __MM512_REDUCE_OP (*);
14463 extern __inline float
14464 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14465 _mm512_mask_reduce_add_ps (__mmask16 __U, __m512 __A)
14467 __A = _mm512_maskz_mov_ps (__U, __A);
14468 __MM512_REDUCE_OP (+);
14471 extern __inline float
14472 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14473 _mm512_mask_reduce_mul_ps (__mmask16 __U, __m512 __A)
14475 __A = _mm512_mask_mov_ps (_mm512_set1_ps (1.0f), __U, __A);
14476 __MM512_REDUCE_OP (*);
14479 #undef __MM512_REDUCE_OP
14480 #define __MM512_REDUCE_OP(op) \
14481 __m256 __T1 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 1); \
14482 __m256 __T2 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 0); \
14483 __m256 __T3 = _mm256_##op (__T1, __T2); \
14484 __m128 __T4 = _mm256_extractf128_ps (__T3, 1); \
14485 __m128 __T5 = _mm256_extractf128_ps (__T3, 0); \
14486 __m128 __T6 = _mm_##op (__T4, __T5); \
14487 __m128 __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 }); \
14488 __m128 __T8 = _mm_##op (__T6, __T7); \
14489 __m128 __T9 = __builtin_shuffle (__T8, (__v4si) { 1, 0, 1, 0 }); \
14490 __m128 __T10 = _mm_##op (__T8, __T9); \
14491 return __T10[0]
14493 extern __inline float
14494 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14495 _mm512_reduce_min_ps (__m512 __A)
14497 __MM512_REDUCE_OP (min_ps);
14500 extern __inline float
14501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14502 _mm512_reduce_max_ps (__m512 __A)
14504 __MM512_REDUCE_OP (max_ps);
14507 extern __inline float
14508 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14509 _mm512_mask_reduce_min_ps (__mmask16 __U, __m512 __A)
14511 __A = _mm512_mask_mov_ps (_mm512_set1_ps (__builtin_inff ()), __U, __A);
14512 __MM512_REDUCE_OP (min_ps);
14515 extern __inline float
14516 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14517 _mm512_mask_reduce_max_ps (__mmask16 __U, __m512 __A)
14519 __A = _mm512_mask_mov_ps (_mm512_set1_ps (-__builtin_inff ()), __U, __A);
14520 __MM512_REDUCE_OP (max_ps);
14523 #undef __MM512_REDUCE_OP
14524 #define __MM512_REDUCE_OP(op) \
14525 __v4di __T1 = (__v4di) _mm512_extracti64x4_epi64 (__A, 1); \
14526 __v4di __T2 = (__v4di) _mm512_extracti64x4_epi64 (__A, 0); \
14527 __m256i __T3 = (__m256i) (__T1 op __T2); \
14528 __v2di __T4 = (__v2di) _mm256_extracti128_si256 (__T3, 1); \
14529 __v2di __T5 = (__v2di) _mm256_extracti128_si256 (__T3, 0); \
14530 __v2di __T6 = __T4 op __T5; \
14531 return __T6[0] op __T6[1]
14533 extern __inline long long
14534 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14535 _mm512_reduce_add_epi64 (__m512i __A)
14537 __MM512_REDUCE_OP (+);
14540 extern __inline long long
14541 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14542 _mm512_reduce_mul_epi64 (__m512i __A)
14544 __MM512_REDUCE_OP (*);
14547 extern __inline long long
14548 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14549 _mm512_reduce_and_epi64 (__m512i __A)
14551 __MM512_REDUCE_OP (&);
14554 extern __inline long long
14555 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14556 _mm512_reduce_or_epi64 (__m512i __A)
14558 __MM512_REDUCE_OP (|);
14561 extern __inline long long
14562 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14563 _mm512_mask_reduce_add_epi64 (__mmask8 __U, __m512i __A)
14565 __A = _mm512_maskz_mov_epi64 (__U, __A);
14566 __MM512_REDUCE_OP (+);
14569 extern __inline long long
14570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14571 _mm512_mask_reduce_mul_epi64 (__mmask8 __U, __m512i __A)
14573 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (1LL), __U, __A);
14574 __MM512_REDUCE_OP (*);
14577 extern __inline long long
14578 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14579 _mm512_mask_reduce_and_epi64 (__mmask8 __U, __m512i __A)
14581 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (~0LL), __U, __A);
14582 __MM512_REDUCE_OP (&);
14585 extern __inline long long
14586 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14587 _mm512_mask_reduce_or_epi64 (__mmask8 __U, __m512i __A)
14589 __A = _mm512_maskz_mov_epi64 (__U, __A);
14590 __MM512_REDUCE_OP (|);
14593 #undef __MM512_REDUCE_OP
14594 #define __MM512_REDUCE_OP(op) \
14595 __m512i __T1 = _mm512_shuffle_i64x2 (__A, __A, 0x4e); \
14596 __m512i __T2 = _mm512_##op (__A, __T1); \
14597 __m512i __T3 \
14598 = (__m512i) __builtin_shuffle ((__v8di) __T2, \
14599 (__v8di) { 2, 3, 0, 1, 6, 7, 4, 5 });\
14600 __m512i __T4 = _mm512_##op (__T2, __T3); \
14601 __m512i __T5 \
14602 = (__m512i) __builtin_shuffle ((__v8di) __T4, \
14603 (__v8di) { 1, 0, 3, 2, 5, 4, 7, 6 });\
14604 __v8di __T6 = (__v8di) _mm512_##op (__T4, __T5); \
14605 return __T6[0]
14607 extern __inline long long
14608 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14609 _mm512_reduce_min_epi64 (__m512i __A)
14611 __MM512_REDUCE_OP (min_epi64);
14614 extern __inline long long
14615 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14616 _mm512_reduce_max_epi64 (__m512i __A)
14618 __MM512_REDUCE_OP (max_epi64);
14621 extern __inline long long
14622 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14623 _mm512_mask_reduce_min_epi64 (__mmask8 __U, __m512i __A)
14625 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (__LONG_LONG_MAX__),
14626 __U, __A);
14627 __MM512_REDUCE_OP (min_epi64);
14630 extern __inline long long
14631 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14632 _mm512_mask_reduce_max_epi64 (__mmask8 __U, __m512i __A)
14634 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (-__LONG_LONG_MAX__ - 1),
14635 __U, __A);
14636 __MM512_REDUCE_OP (max_epi64);
14639 extern __inline unsigned long long
14640 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14641 _mm512_reduce_min_epu64 (__m512i __A)
14643 __MM512_REDUCE_OP (min_epu64);
14646 extern __inline unsigned long long
14647 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14648 _mm512_reduce_max_epu64 (__m512i __A)
14650 __MM512_REDUCE_OP (max_epu64);
14653 extern __inline unsigned long long
14654 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14655 _mm512_mask_reduce_min_epu64 (__mmask8 __U, __m512i __A)
14657 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (~0LL), __U, __A);
14658 __MM512_REDUCE_OP (min_epu64);
14661 extern __inline unsigned long long
14662 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14663 _mm512_mask_reduce_max_epu64 (__mmask8 __U, __m512i __A)
14665 __A = _mm512_maskz_mov_epi64 (__U, __A);
14666 __MM512_REDUCE_OP (max_epu64);
14669 #undef __MM512_REDUCE_OP
14670 #define __MM512_REDUCE_OP(op) \
14671 __m256d __T1 = (__m256d) _mm512_extractf64x4_pd (__A, 1); \
14672 __m256d __T2 = (__m256d) _mm512_extractf64x4_pd (__A, 0); \
14673 __m256d __T3 = __T1 op __T2; \
14674 __m128d __T4 = _mm256_extractf128_pd (__T3, 1); \
14675 __m128d __T5 = _mm256_extractf128_pd (__T3, 0); \
14676 __m128d __T6 = __T4 op __T5; \
14677 return __T6[0] op __T6[1]
14679 extern __inline double
14680 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14681 _mm512_reduce_add_pd (__m512d __A)
14683 __MM512_REDUCE_OP (+);
14686 extern __inline double
14687 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14688 _mm512_reduce_mul_pd (__m512d __A)
14690 __MM512_REDUCE_OP (*);
14693 extern __inline double
14694 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14695 _mm512_mask_reduce_add_pd (__mmask8 __U, __m512d __A)
14697 __A = _mm512_maskz_mov_pd (__U, __A);
14698 __MM512_REDUCE_OP (+);
14701 extern __inline double
14702 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14703 _mm512_mask_reduce_mul_pd (__mmask8 __U, __m512d __A)
14705 __A = _mm512_mask_mov_pd (_mm512_set1_pd (1.0), __U, __A);
14706 __MM512_REDUCE_OP (*);
14709 #undef __MM512_REDUCE_OP
14710 #define __MM512_REDUCE_OP(op) \
14711 __m256d __T1 = (__m256d) _mm512_extractf64x4_pd (__A, 1); \
14712 __m256d __T2 = (__m256d) _mm512_extractf64x4_pd (__A, 0); \
14713 __m256d __T3 = _mm256_##op (__T1, __T2); \
14714 __m128d __T4 = _mm256_extractf128_pd (__T3, 1); \
14715 __m128d __T5 = _mm256_extractf128_pd (__T3, 0); \
14716 __m128d __T6 = _mm_##op (__T4, __T5); \
14717 __m128d __T7 = (__m128d) __builtin_shuffle (__T6, (__v2di) { 1, 0 }); \
14718 __m128d __T8 = _mm_##op (__T6, __T7); \
14719 return __T8[0]
14721 extern __inline double
14722 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14723 _mm512_reduce_min_pd (__m512d __A)
14725 __MM512_REDUCE_OP (min_pd);
14728 extern __inline double
14729 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14730 _mm512_reduce_max_pd (__m512d __A)
14732 __MM512_REDUCE_OP (max_pd);
14735 extern __inline double
14736 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14737 _mm512_mask_reduce_min_pd (__mmask8 __U, __m512d __A)
14739 __A = _mm512_mask_mov_pd (_mm512_set1_pd (__builtin_inf ()), __U, __A);
14740 __MM512_REDUCE_OP (min_pd);
14743 extern __inline double
14744 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14745 _mm512_mask_reduce_max_pd (__mmask8 __U, __m512d __A)
14747 __A = _mm512_mask_mov_pd (_mm512_set1_pd (-__builtin_inf ()), __U, __A);
14748 __MM512_REDUCE_OP (max_pd);
14751 #undef __MM512_REDUCE_OP
14753 #ifdef __DISABLE_AVX512F__
14754 #undef __DISABLE_AVX512F__
14755 #pragma GCC pop_options
14756 #endif /* __DISABLE_AVX512F__ */
14758 #endif /* _AVX512FINTRIN_H_INCLUDED */